diff options
Diffstat (limited to 'tools')
171 files changed, 6343 insertions, 1971 deletions
diff --git a/tools/build/Build.include b/tools/build/Build.include new file mode 100644 index 000000000000..4c8daaccb82a --- /dev/null +++ b/tools/build/Build.include @@ -0,0 +1,81 @@ +### +# build: Generic definitions +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +### +# Convenient variables +comma := , +squote := ' + +### +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +### +# filename of target with directory and extension stripped +basetarget = $(basename $(notdir $@)) + +### +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +### +# Check if both arguments has same arguments. Result is empty string if equal. +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) + +### +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Echo command +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))';) + +### +# Replace >$< with >$$< to preserve $ when reloading the .cmd file +# (needed for make) +# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# (needed for make) +# Replace >'< with >'\''< to be able to enclose the whole string in '...' +# (needed for the shell) +make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) + +### +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) + +### +# if_changed_dep - execute command if any prerequisite is newer than +# target, or command line has changed and update +# dependencies in the cmd file +if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +# if_changed - execute command if any prerequisite is newer than +# target, or command line has changed +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +### +# C flags to be used in rule definitions, includes: +# - depfile generation +# - global $(CFLAGS) +# - per target C flags +# - per object C flags +# - BUILD_STR macro to allow '-D"$(variable)"' constructs +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt new file mode 100644 index 000000000000..00ad2d608727 --- /dev/null +++ b/tools/build/Documentation/Build.txt @@ -0,0 +1,139 @@ +Build Framework +=============== + +The perf build framework was adopted from the kernel build system, hence the +idea and the way how objects are built is the same. + +Basically the user provides set of 'Build' files that list objects and +directories to nest for specific target to be build. + +Unlike the kernel we don't have a single build object 'obj-y' list that where +we setup source objects, but we support more. This allows one 'Build' file to +carry a sources list for multiple build objects. + +a) Build framework makefiles +---------------------------- + +The build framework consists of 2 Makefiles: + + Build.include + Makefile.build + +While the 'Build.include' file contains just some generic definitions, the +'Makefile.build' file is the makefile used from the outside. It's +interface/usage is following: + + $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + +where: + + KSRC - is the path to kernel sources + DIR - is the path to the project to be built + OBJECT - is the name of the build object + +When succefully finished the $(DIR) directory contains the final object file +called $(OBJECT)-in.o: + + $ ls $(DIR)/$(OBJECT)-in.o + +which includes all compiled sources described in 'Build' makefiles. + +a) Build makefiles +------------------ + +The user supplies 'Build' makefiles that contains a objects list, and connects +the build to nested directories. + +Assume we have the following project structure: + + ex/a.c + /b.c + /c.c + /d.c + /arch/e.c + /arch/f.c + +Out of which you build the 'ex' binary ' and the 'libex.a' library: + + 'ex' - consists of 'a.o', 'b.o' and libex.a + 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o' + +The build framework does not create the 'ex' and 'libex.a' binaries for you, it +only prepares proper objects to be compiled and grouped together. + +To follow the above example, the user provides following 'Build' files: + + ex/Build: + ex-y += a.o + ex-y += b.o + + libex-y += c.o + libex-y += d.o + libex-y += arch/ + + ex/arch/Build: + libex-y += e.o + libex-y += f.o + +and runs: + + $ make -f tools/build/Makefile.build dir=. obj=ex + $ make -f tools/build/Makefile.build dir=. obj=libex + +which creates the following objects: + + ex/ex-in.o + ex/libex-in.o + +that contain request objects names in Build files. + +It's only a matter of 2 single commands to create the final binaries: + + $ ar rcs libex.a libex-in.o + $ gcc -o ex ex-in.o libex.a + +You can check the 'ex' example in 'tools/build/tests/ex' for more details. + +b) Rules +-------- + +The build framework provides standard compilation rules to handle .S and .c +compilation. + +It's possible to include special rule if needed (like we do for flex or bison +code generation). + +c) CFLAGS +--------- + +It's possible to alter the standard object C flags in the following way: + + CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object + CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + +This C flags changes has the scope of the Build makefile they are defined in. + + +d) Dependencies +--------------- + +For each built object file 'a.o' the '.a.cmd' is created and holds: + + - Command line used to built that object + (for each object) + + - Dependency rules generated by 'gcc -Wp,-MD,...' + (for compiled object) + +All existing '.cmd' files are included in the Build process to follow properly +the dependencies and trigger a rebuild when necessary. + + +e) Single rules +--------------- + +It's possible to build single object file by choice, like: + + $ make util/map.o # objects + $ make util/map.i # preprocessor + $ make util/map.s # assembly diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build new file mode 100644 index 000000000000..10df57237a66 --- /dev/null +++ b/tools/build/Makefile.build @@ -0,0 +1,130 @@ +### +# Main build makefile. +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +PHONY := __build +__build: + +ifeq ($(V),1) + quiet = + Q = +else + quiet=quiet_ + Q=@ +endif + +build-dir := $(srctree)/tools/build + +# Generic definitions +include $(build-dir)/Build.include + +# do not force detected configuration +-include .config-detected + +# Init all relevant variables used in build files so +# 1) they have correct type +# 2) they do not inherit any value from the environment +subdir-y := +obj-y := +subdir-y := +subdir-obj-y := + +# Build definitions +build-file := $(dir)/Build +include $(build-file) + +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ + +# Create directory unless it exists +quiet_cmd_mkdir = MKDIR $(dir $@) + cmd_mkdir = mkdir -p $(dir $@) + rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) + +# Compile command +quiet_cmd_cc_o_c = CC $@ + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +quiet_cmd_cc_i_c = CPP $@ + cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< + +quiet_cmd_cc_s_c = AS $@ + cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< + +# Link agregate command +# If there's nothing to link, create empty $@ object. +quiet_cmd_ld_multi = LD $@ + cmd_ld_multi = $(if $(strip $(obj-y)),\ + $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + +# Build rules +$(OUTPUT)%.o: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.o: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.i: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.i: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.s: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_s_c) + +# Gather build data: +# obj-y - list of build objects +# subdir-y - list of directories to nest +# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o' +obj-y := $($(obj)-y) +subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) +subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) + +# '$(OUTPUT)/dir' prefix to all objects +prefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(prefix),$(obj-y)) +subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) + +# Final '$(obj)-in.o' object +in-target := $(prefix)$(obj)-in.o + +PHONY += $(subdir-y) + +$(subdir-y): + $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + +$(sort $(subdir-obj-y)): $(subdir-y) ; + +$(in-target): $(obj-y) FORCE + $(call rule_mkdir) + $(call if_changed,ld_multi) + +__build: $(in-target) + @: + +PHONY += FORCE +FORCE: + +# Include all cmd files to get all the dependency rules +# for all objects included +targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +.PHONY: $(PHONY) diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build new file mode 100644 index 000000000000..0e6c3e6767e6 --- /dev/null +++ b/tools/build/tests/ex/Build @@ -0,0 +1,8 @@ +ex-y += ex.o +ex-y += a.o +ex-y += b.o +ex-y += empty/ + +libex-y += c.o +libex-y += d.o +libex-y += arch/ diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile new file mode 100644 index 000000000000..52d2476073a3 --- /dev/null +++ b/tools/build/tests/ex/Makefile @@ -0,0 +1,23 @@ +export srctree := ../../../.. +export CC := gcc +export LD := ld +export AR := ar + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: ex-in.o libex-in.o + gcc -o $@ $^ + +ex.%: FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +ex-in.o: FORCE + make $(build)=ex + +libex-in.o: FORCE + make $(build)=libex + +clean: + find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + rm -f ex ex.i ex.s + +.PHONY: FORCE diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c new file mode 100644 index 000000000000..851762798c83 --- /dev/null +++ b/tools/build/tests/ex/a.c @@ -0,0 +1,5 @@ + +int a(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build new file mode 100644 index 000000000000..55506189efae --- /dev/null +++ b/tools/build/tests/ex/arch/Build @@ -0,0 +1,2 @@ +libex-y += e.o +libex-y += f.o diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c new file mode 100644 index 000000000000..beaa4a1d7ba8 --- /dev/null +++ b/tools/build/tests/ex/arch/e.c @@ -0,0 +1,5 @@ + +int e(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c new file mode 100644 index 000000000000..7c3e9e9da5b7 --- /dev/null +++ b/tools/build/tests/ex/arch/f.c @@ -0,0 +1,5 @@ + +int f(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c new file mode 100644 index 000000000000..c24ff9ca9a97 --- /dev/null +++ b/tools/build/tests/ex/b.c @@ -0,0 +1,5 @@ + +int b(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c new file mode 100644 index 000000000000..e216d0217499 --- /dev/null +++ b/tools/build/tests/ex/c.c @@ -0,0 +1,5 @@ + +int c(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c new file mode 100644 index 000000000000..80dc0f06151b --- /dev/null +++ b/tools/build/tests/ex/d.c @@ -0,0 +1,5 @@ + +int d(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/build/tests/ex/empty/Build diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c new file mode 100644 index 000000000000..dc42eb2e1a67 --- /dev/null +++ b/tools/build/tests/ex/ex.c @@ -0,0 +1,19 @@ + +int a(void); +int b(void); +int c(void); +int d(void); +int e(void); +int f(void); + +int main(void) +{ + a(); + b(); + c(); + d(); + e(); + f(); + + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh new file mode 100755 index 000000000000..5494f8ea7567 --- /dev/null +++ b/tools/build/tests/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +function test_ex { + make -C ex V=1 clean > ex.out 2>&1 + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + +function test_ex_suffix { + make -C ex V=1 clean > ex.out 2>&1 + + # use -rR to disable make's builtin rules + make -rR -C ex V=1 ex.o >> ex.out 2>&1 + make -rR -C ex V=1 ex.i >> ex.out 2>&1 + make -rR -C ex V=1 ex.s >> ex.out 2>&1 + + if [ -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} +echo -n Testing.. + +test_ex +test_ex_suffix + +echo OK diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bd22f786a60c..99ffe61051a7 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -5,9 +5,9 @@ PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) -all: hv_kvp_daemon hv_vss_daemon +all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon %: %.c $(CC) $(CFLAGS) -o $@ $^ clean: - $(RM) hv_kvp_daemon hv_vss_daemon + $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index f437d739f37d..9445d8f264a4 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -43,15 +43,9 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg) int error = HV_E_FAIL; char *q, *p; - /* - * If possile append a path seperator to the path. - */ - if (strlen((char *)smsg->path_name) < (W_MAX_PATH - 2)) - strcat((char *)smsg->path_name, "/"); - p = (char *)smsg->path_name; snprintf(target_fname, sizeof(target_fname), "%s/%s", - (char *)smsg->path_name, smsg->file_name); + (char *)smsg->path_name, (char *)smsg->file_name); syslog(LOG_INFO, "Target file name: %s", target_fname); /* @@ -137,7 +131,7 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fd, fcopy_fd, len; + int fcopy_fd, len; int error; int daemonize = 1, long_index = 0, opt; int version = FCOPY_CURRENT_VERSION; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 6a6432a20a1d..408bb076a234 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -147,7 +147,6 @@ static void kvp_release_lock(int pool) static void kvp_update_file(int pool) { FILE *filep; - size_t bytes_written; /* * We are going to write our in-memory registry out to @@ -163,8 +162,7 @@ static void kvp_update_file(int pool) exit(EXIT_FAILURE); } - bytes_written = fwrite(kvp_file_info[pool].records, - sizeof(struct kvp_record), + fwrite(kvp_file_info[pool].records, sizeof(struct kvp_record), kvp_file_info[pool].num_records, filep); if (ferror(filep) || fclose(filep)) { @@ -310,7 +308,7 @@ static int kvp_file_init(void) return 0; } -static int kvp_key_delete(int pool, const char *key, int key_size) +static int kvp_key_delete(int pool, const __u8 *key, int key_size) { int i; int j, k; @@ -353,8 +351,8 @@ static int kvp_key_delete(int pool, const char *key, int key_size) return 1; } -static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value, - int value_size) +static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, + const __u8 *value, int value_size) { int i; int num_records; @@ -407,7 +405,7 @@ static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const return 0; } -static int kvp_get_value(int pool, const char *key, int key_size, char *value, +static int kvp_get_value(int pool, const __u8 *key, int key_size, __u8 *value, int value_size) { int i; @@ -439,8 +437,8 @@ static int kvp_get_value(int pool, const char *key, int key_size, char *value, return 1; } -static int kvp_pool_enumerate(int pool, int index, char *key, int key_size, - char *value, int value_size) +static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, + __u8 *value, int value_size) { struct kvp_record *record; @@ -661,7 +659,7 @@ static char *kvp_if_name_to_mac(char *if_name) char *p, *x; char buf[256]; char addr_file[256]; - int i; + unsigned int i; char *mac_addr = NULL; snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/", @@ -700,7 +698,7 @@ static char *kvp_mac_to_if_name(char *mac) char buf[256]; char *kvp_net_dir = "/sys/class/net/"; char dev_id[256]; - int i; + unsigned int i; dir = opendir(kvp_net_dir); if (dir == NULL) @@ -750,7 +748,7 @@ static char *kvp_mac_to_if_name(char *mac) static void kvp_process_ipconfig_file(char *cmd, - char *config_buf, int len, + char *config_buf, unsigned int len, int element_size, int offset) { char buf[256]; @@ -768,7 +766,7 @@ static void kvp_process_ipconfig_file(char *cmd, if (offset == 0) memset(config_buf, 0, len); while ((p = fgets(buf, sizeof(buf), file)) != NULL) { - if ((len - strlen(config_buf)) < (element_size + 1)) + if (len < strlen(config_buf) + element_size + 1) break; x = strchr(p, '\n'); @@ -916,7 +914,7 @@ static int kvp_process_ip_address(void *addrp, static int kvp_get_ip_info(int family, char *if_name, int op, - void *out_buffer, int length) + void *out_buffer, unsigned int length) { struct ifaddrs *ifap; struct ifaddrs *curp; @@ -1019,8 +1017,7 @@ kvp_get_ip_info(int family, char *if_name, int op, weight += hweight32(&w[i]); sprintf(cidr_mask, "/%d", weight); - if ((length - sn_offset) < - (strlen(cidr_mask) + 1)) + if (length < sn_offset + strlen(cidr_mask) + 1) goto gather_ipaddr; if (sn_offset == 0) @@ -1308,16 +1305,17 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; + /* + * The dhcp_enabled flag is only for IPv4. In the case the host only + * injects an IPv6 address, the flag is true, but we still need to + * proceed to parse and pass the IPv6 information to the + * disto-specific script hv_set_ifconfig. + */ if (new_val->dhcp_enabled) { error = kvp_write_file(file, "BOOTPROTO", "", "dhcp"); if (error) goto setval_error; - /* - * We are done!. - */ - goto setval_done; - } else { error = kvp_write_file(file, "BOOTPROTO", "", "none"); if (error) @@ -1345,7 +1343,6 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; -setval_done: fclose(file); /* diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile index 97bca4871ea3..a107b5e4da13 100644 --- a/tools/lguest/Makefile +++ b/tools/lguest/Makefile @@ -1,7 +1,13 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE +CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE -Iinclude all: lguest +include/linux/virtio_types.h: ../../include/uapi/linux/virtio_types.h + mkdir -p include/linux 2>&1 || true + ln -sf ../../../../include/uapi/linux/virtio_types.h $@ + +lguest: include/linux/virtio_types.h + clean: rm -f lguest diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 32cf2ce15d69..e44052483ed9 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -41,6 +41,8 @@ #include <signal.h> #include <pwd.h> #include <grp.h> +#include <sys/user.h> +#include <linux/pci_regs.h> #ifndef VIRTIO_F_ANY_LAYOUT #define VIRTIO_F_ANY_LAYOUT 27 @@ -61,12 +63,19 @@ typedef uint16_t u16; typedef uint8_t u8; /*:*/ -#include <linux/virtio_config.h> -#include <linux/virtio_net.h> -#include <linux/virtio_blk.h> -#include <linux/virtio_console.h> -#include <linux/virtio_rng.h> +#define VIRTIO_CONFIG_NO_LEGACY +#define VIRTIO_PCI_NO_LEGACY +#define VIRTIO_BLK_NO_LEGACY +#define VIRTIO_NET_NO_LEGACY + +/* Use in-kernel ones, which defines VIRTIO_F_VERSION_1 */ +#include "../../include/uapi/linux/virtio_config.h" +#include "../../include/uapi/linux/virtio_net.h" +#include "../../include/uapi/linux/virtio_blk.h" +#include "../../include/uapi/linux/virtio_console.h" +#include "../../include/uapi/linux/virtio_rng.h" #include <linux/virtio_ring.h> +#include "../../include/uapi/linux/virtio_pci.h" #include <asm/bootparam.h> #include "../../include/linux/lguest_launcher.h" @@ -91,13 +100,16 @@ static bool verbose; /* The pointer to the start of guest memory. */ static void *guest_base; /* The maximum guest physical address allowed, and maximum possible. */ -static unsigned long guest_limit, guest_max; +static unsigned long guest_limit, guest_max, guest_mmio; /* The /dev/lguest file descriptor. */ static int lguest_fd; /* a per-cpu variable indicating whose vcpu is currently running */ static unsigned int __thread cpu_id; +/* 5 bit device number in the PCI_CONFIG_ADDR => 32 only */ +#define MAX_PCI_DEVICES 32 + /* This is our list of devices. */ struct device_list { /* Counter to assign interrupt numbers. */ @@ -106,30 +118,50 @@ struct device_list { /* Counter to print out convenient device numbers. */ unsigned int device_num; - /* The descriptor page for the devices. */ - u8 *descpage; - - /* A single linked list of devices. */ - struct device *dev; - /* And a pointer to the last device for easy append. */ - struct device *lastdev; + /* PCI devices. */ + struct device *pci[MAX_PCI_DEVICES]; }; /* The list of Guest devices, based on command line arguments. */ static struct device_list devices; -/* The device structure describes a single device. */ -struct device { - /* The linked-list pointer. */ - struct device *next; +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + u32 pci_cfg_data; /* Data for BAR access. */ +}; - /* The device's descriptor, as mapped into the Guest. */ - struct lguest_device_desc *desc; +struct virtio_pci_mmio { + struct virtio_pci_common_cfg cfg; + u16 notify; + u8 isr; + u8 padding; + /* Device-specific configuration follows this. */ +}; - /* We can't trust desc values once Guest has booted: we use these. */ - unsigned int feature_len; - unsigned int num_vq; +/* This is the layout (little-endian) of the PCI config space. */ +struct pci_config { + u16 vendor_id, device_id; + u16 command, status; + u8 revid, prog_if, subclass, class; + u8 cacheline_size, lat_timer, header_type, bist; + u32 bar[6]; + u32 cardbus_cis_ptr; + u16 subsystem_vendor_id, subsystem_device_id; + u32 expansion_rom_addr; + u8 capabilities, reserved1[3]; + u32 reserved2; + u8 irq_line, irq_pin, min_grant, max_latency; + + /* Now, this is the linked capability list. */ + struct virtio_pci_cap common; + struct virtio_pci_notify_cap notify; + struct virtio_pci_cap isr; + struct virtio_pci_cap device; + struct virtio_pci_cfg_cap cfg_access; +}; +/* The device structure describes a single device. */ +struct device { /* The name of this device, for --verbose. */ const char *name; @@ -139,6 +171,25 @@ struct device { /* Is it operational */ bool running; + /* Has it written FEATURES_OK but not re-checked it? */ + bool wrote_features_ok; + + /* PCI configuration */ + union { + struct pci_config config; + u32 config_words[sizeof(struct pci_config) / sizeof(u32)]; + }; + + /* Features we offer, and those accepted. */ + u64 features, features_accepted; + + /* Device-specific config hangs off the end of this. */ + struct virtio_pci_mmio *mmio; + + /* PCI MMIO resources (all in BAR0) */ + size_t mmio_size; + u32 mmio_addr; + /* Device-specific data. */ void *priv; }; @@ -150,12 +201,15 @@ struct virtqueue { /* Which device owns me. */ struct device *dev; - /* The configuration for this queue. */ - struct lguest_vqconfig config; + /* Name for printing errors. */ + const char *name; /* The actual ring of buffers. */ struct vring vring; + /* The information about this virtqueue (we only use queue_size on) */ + struct virtio_pci_common_cfg pci_config; + /* Last available index we saw. */ u16 last_avail_idx; @@ -199,6 +253,16 @@ static struct termios orig_term; #define le32_to_cpu(v32) (v32) #define le64_to_cpu(v64) (v64) +/* + * A real device would ignore weird/non-compliant driver behaviour. We + * stop and flag it, to help debugging Linux problems. + */ +#define bad_driver(d, fmt, ...) \ + errx(1, "%s: bad driver: " fmt, (d)->name, ## __VA_ARGS__) +#define bad_driver_vq(vq, fmt, ...) \ + errx(1, "%s vq %s: bad driver: " fmt, (vq)->dev->name, \ + vq->name, ## __VA_ARGS__) + /* Is this iovec empty? */ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) { @@ -211,7 +275,8 @@ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) } /* Take len bytes from the front of this iovec. */ -static void iov_consume(struct iovec iov[], unsigned num_iov, +static void iov_consume(struct device *d, + struct iovec iov[], unsigned num_iov, void *dest, unsigned len) { unsigned int i; @@ -229,14 +294,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, len -= used; } if (len != 0) - errx(1, "iovec too short!"); -} - -/* The device virtqueue descriptors are followed by feature bitmasks. */ -static u8 *get_feature_bits(struct device *dev) -{ - return (u8 *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig); + bad_driver(d, "iovec too short!"); } /*L:100 @@ -309,14 +367,20 @@ static void *map_zeroed_pages(unsigned int num) return addr + getpagesize(); } -/* Get some more pages for a device. */ -static void *get_pages(unsigned int num) +/* Get some bytes which won't be mapped into the guest. */ +static unsigned long get_mmio_region(size_t size) { - void *addr = from_guest_phys(guest_limit); + unsigned long addr = guest_mmio; + size_t i; + + if (!size) + return addr; + + /* Size has to be a power of 2 (and multiple of 16) */ + for (i = 1; i < size; i <<= 1); + + guest_mmio += i; - guest_limit += num * getpagesize(); - if (guest_limit > guest_max) - errx(1, "Not enough memory for devices"); return addr; } @@ -547,9 +611,11 @@ static void tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), start }; - verbose("Guest: %p - %p (%#lx)\n", - guest_base, guest_base + guest_limit, guest_limit); + guest_limit / getpagesize(), start, + (guest_mmio+getpagesize()-1) / getpagesize() }; + verbose("Guest: %p - %p (%#lx, MMIO %#lx)\n", + guest_base, guest_base + guest_limit, + guest_limit, guest_mmio); lguest_fd = open_or_die("/dev/lguest", O_RDWR); if (write(lguest_fd, args, sizeof(args)) < 0) err(1, "Writing to /dev/lguest"); @@ -564,7 +630,8 @@ static void tell_kernel(unsigned long start) * we have a convenient routine which checks it and exits with an error message * if something funny is going on: */ -static void *_check_pointer(unsigned long addr, unsigned int size, +static void *_check_pointer(struct device *d, + unsigned long addr, unsigned int size, unsigned int line) { /* @@ -572,7 +639,8 @@ static void *_check_pointer(unsigned long addr, unsigned int size, * or addr + size wraps around. */ if ((addr + size) > guest_limit || (addr + size) < addr) - errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); + bad_driver(d, "%s:%i: Invalid address %#lx", + __FILE__, line, addr); /* * We return a pointer for the caller's convenience, now we know it's * safe to use. @@ -580,14 +648,14 @@ static void *_check_pointer(unsigned long addr, unsigned int size, return from_guest_phys(addr); } /* A macro which transparently hands the line number to the real function. */ -#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) +#define check_pointer(d,addr,size) _check_pointer(d, addr, size, __LINE__) /* * Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, or vq->vring.num if we're * at the end. */ -static unsigned next_desc(struct vring_desc *desc, +static unsigned next_desc(struct device *d, struct vring_desc *desc, unsigned int i, unsigned int max) { unsigned int next; @@ -602,7 +670,7 @@ static unsigned next_desc(struct vring_desc *desc, wmb(); if (next >= max) - errx(1, "Desc next is %u", next); + bad_driver(d, "Desc next is %u", next); return next; } @@ -613,21 +681,48 @@ static unsigned next_desc(struct vring_desc *desc, */ static void trigger_irq(struct virtqueue *vq) { - unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; + unsigned long buf[] = { LHREQ_IRQ, vq->dev->config.irq_line }; /* Don't inform them if nothing used. */ if (!vq->pending_used) return; vq->pending_used = 0; - /* If they don't want an interrupt, don't send one... */ + /* + * 2.4.7.1: + * + * If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: + * The driver MUST set flags to 0 or 1. + */ + if (vq->vring.avail->flags > 1) + bad_driver_vq(vq, "avail->flags = %u\n", vq->vring.avail->flags); + + /* + * 2.4.7.2: + * + * If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: + * + * - The device MUST ignore the used_event value. + * - After the device writes a descriptor index into the used ring: + * - If flags is 1, the device SHOULD NOT send an interrupt. + * - If flags is 0, the device MUST send an interrupt. + */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { return; } + /* + * 4.1.4.5.1: + * + * If MSI-X capability is disabled, the device MUST set the Queue + * Interrupt bit in ISR status before sending a virtqueue notification + * to the driver. + */ + vq->dev->mmio->isr = 0x1; + /* Send the Guest an interrupt tell them we used something up. */ if (write(lguest_fd, buf, sizeof(buf)) != 0) - err(1, "Triggering irq %i", vq->config.irq); + err(1, "Triggering irq %i", vq->dev->config.irq_line); } /* @@ -646,6 +741,14 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct vring_desc *desc; u16 last_avail = lg_last_avail(vq); + /* + * 2.4.7.1: + * + * The driver MUST handle spurious interrupts from the device. + * + * That's why this is a while loop. + */ + /* There's nothing available? */ while (last_avail == vq->vring.avail->idx) { u64 event; @@ -679,8 +782,8 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) - errx(1, "Guest moved used index from %u to %u", - last_avail, vq->vring.avail->idx); + bad_driver_vq(vq, "Guest moved used index from %u to %u", + last_avail, vq->vring.avail->idx); /* * Make sure we read the descriptor number *after* we read the ring @@ -697,7 +800,7 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, /* If their number is silly, that's a fatal mistake. */ if (head >= vq->vring.num) - errx(1, "Guest says index %u is available", head); + bad_driver_vq(vq, "Guest says index %u is available", head); /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; @@ -712,24 +815,73 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, * that: no rmb() required. */ - /* - * If this is an indirect entry, then this buffer contains a descriptor - * table which we handle as if it's any normal descriptor chain. - */ - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) - errx(1, "Invalid size for indirect buffer table"); + do { + /* + * If this is an indirect entry, then this buffer contains a + * descriptor table which we handle as if it's any normal + * descriptor chain. + */ + if (desc[i].flags & VRING_DESC_F_INDIRECT) { + /* 2.4.5.3.1: + * + * The driver MUST NOT set the VIRTQ_DESC_F_INDIRECT + * flag unless the VIRTIO_F_INDIRECT_DESC feature was + * negotiated. + */ + if (!(vq->dev->features_accepted & + (1<<VIRTIO_RING_F_INDIRECT_DESC))) + bad_driver_vq(vq, "vq indirect not negotiated"); - max = desc[i].len / sizeof(struct vring_desc); - desc = check_pointer(desc[i].addr, desc[i].len); - i = 0; - } + /* + * 2.4.5.3.1: + * + * The driver MUST NOT set the VIRTQ_DESC_F_INDIRECT + * flag within an indirect descriptor (ie. only one + * table per descriptor). + */ + if (desc != vq->vring.desc) + bad_driver_vq(vq, "Indirect within indirect"); + + /* + * Proposed update VIRTIO-134 spells this out: + * + * A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT + * and VIRTQ_DESC_F_NEXT in flags. + */ + if (desc[i].flags & VRING_DESC_F_NEXT) + bad_driver_vq(vq, "indirect and next together"); + + if (desc[i].len % sizeof(struct vring_desc)) + bad_driver_vq(vq, + "Invalid size for indirect table"); + /* + * 2.4.5.3.2: + * + * The device MUST ignore the write-only flag + * (flags&VIRTQ_DESC_F_WRITE) in the descriptor that + * refers to an indirect table. + * + * We ignore it here: :) + */ + + max = desc[i].len / sizeof(struct vring_desc); + desc = check_pointer(vq->dev, desc[i].addr, desc[i].len); + i = 0; + + /* 2.4.5.3.1: + * + * A driver MUST NOT create a descriptor chain longer + * than the Queue Size of the device. + */ + if (max > vq->pci_config.queue_size) + bad_driver_vq(vq, + "indirect has too many entries"); + } - do { /* Grab the first descriptor, and check it's OK. */ iov[*out_num + *in_num].iov_len = desc[i].len; iov[*out_num + *in_num].iov_base - = check_pointer(desc[i].addr, desc[i].len); + = check_pointer(vq->dev, desc[i].addr, desc[i].len); /* If this is an input descriptor, increment that count. */ if (desc[i].flags & VRING_DESC_F_WRITE) (*in_num)++; @@ -739,14 +891,15 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, * to come before any input descriptors. */ if (*in_num) - errx(1, "Descriptor has out after in"); + bad_driver_vq(vq, + "Descriptor has out after in"); (*out_num)++; } /* If we've got too many, that implies a descriptor loop. */ if (*out_num + *in_num > max) - errx(1, "Looped descriptor"); - } while ((i = next_desc(desc, i, max)) != max); + bad_driver_vq(vq, "Looped descriptor"); + } while ((i = next_desc(vq->dev, desc, i, max)) != max); return head; } @@ -803,7 +956,7 @@ static void console_input(struct virtqueue *vq) /* Make sure there's a descriptor available. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); if (out_num) - errx(1, "Output buffers in console in queue?"); + bad_driver_vq(vq, "Output buffers in console in queue?"); /* Read into it. This is where we usually wait. */ len = readv(STDIN_FILENO, iov, in_num); @@ -856,7 +1009,7 @@ static void console_output(struct virtqueue *vq) /* We usually wait in here, for the Guest to give us something. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) - errx(1, "Input buffers in console output queue?"); + bad_driver_vq(vq, "Input buffers in console output queue?"); /* writev can return a partial write, so we loop here. */ while (!iov_empty(iov, out)) { @@ -865,7 +1018,7 @@ static void console_output(struct virtqueue *vq) warn("Write to stdout gave %i (%d)", len, errno); break; } - iov_consume(iov, out, NULL, len); + iov_consume(vq->dev, iov, out, NULL, len); } /* @@ -894,7 +1047,7 @@ static void net_output(struct virtqueue *vq) /* We usually wait in here for the Guest to give us a packet. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) - errx(1, "Input buffers in net output queue?"); + bad_driver_vq(vq, "Input buffers in net output queue?"); /* * Send the whole thing through to /dev/net/tun. It expects the exact * same format: what a coincidence! @@ -942,7 +1095,7 @@ static void net_input(struct virtqueue *vq) */ head = wait_for_vq_desc(vq, iov, &out, &in); if (out) - errx(1, "Output buffers in net input queue?"); + bad_driver_vq(vq, "Output buffers in net input queue?"); /* * If it looks like we'll block reading from the tun device, send them @@ -986,6 +1139,12 @@ static void kill_launcher(int signal) kill(0, SIGTERM); } +static void reset_vq_pci_config(struct virtqueue *vq) +{ + vq->pci_config.queue_size = VIRTQUEUE_NUM; + vq->pci_config.queue_enable = 0; +} + static void reset_device(struct device *dev) { struct virtqueue *vq; @@ -993,53 +1152,705 @@ static void reset_device(struct device *dev) verbose("Resetting device %s\n", dev->name); /* Clear any features they've acked. */ - memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); + dev->features_accepted = 0; /* We're going to be explicitly killing threads, so ignore them. */ signal(SIGCHLD, SIG_IGN); - /* Zero out the virtqueues, get rid of their threads */ + /* + * 4.1.4.3.1: + * + * The device MUST present a 0 in queue_enable on reset. + * + * This means we set it here, and reset the saved ones in every vq. + */ + dev->mmio->cfg.queue_enable = 0; + + /* Get rid of the virtqueue threads */ for (vq = dev->vq; vq; vq = vq->next) { + vq->last_avail_idx = 0; + reset_vq_pci_config(vq); if (vq->thread != (pid_t)-1) { kill(vq->thread, SIGTERM); waitpid(vq->thread, NULL, 0); vq->thread = (pid_t)-1; } - memset(vq->vring.desc, 0, - vring_size(vq->config.num, LGUEST_VRING_ALIGN)); - lg_last_avail(vq) = 0; } dev->running = false; + dev->wrote_features_ok = false; /* Now we care if threads die. */ signal(SIGCHLD, (void *)kill_launcher); } +static void cleanup_devices(void) +{ + unsigned int i; + + for (i = 1; i < MAX_PCI_DEVICES; i++) { + struct device *d = devices.pci[i]; + if (!d) + continue; + reset_device(d); + } + + /* If we saved off the original terminal settings, restore them now. */ + if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) + tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); +} + +/*L:217 + * We do PCI. This is mainly done to let us test the kernel virtio PCI + * code. + */ + +/* Linux expects a PCI host bridge: ours is a dummy, and first on the bus. */ +static struct device pci_host_bridge; + +static void init_pci_host_bridge(void) +{ + pci_host_bridge.name = "PCI Host Bridge"; + pci_host_bridge.config.class = 0x06; /* bridge */ + pci_host_bridge.config.subclass = 0; /* host bridge */ + devices.pci[0] = &pci_host_bridge; +} + +/* The IO ports used to read the PCI config space. */ +#define PCI_CONFIG_ADDR 0xCF8 +#define PCI_CONFIG_DATA 0xCFC + +/* + * Not really portable, but does help readability: this is what the Guest + * writes to the PCI_CONFIG_ADDR IO port. + */ +union pci_config_addr { + struct { + unsigned mbz: 2; + unsigned offset: 6; + unsigned funcnum: 3; + unsigned devnum: 5; + unsigned busnum: 8; + unsigned reserved: 7; + unsigned enabled : 1; + } bits; + u32 val; +}; + +/* + * We cache what they wrote to the address port, so we know what they're + * talking about when they access the data port. + */ +static union pci_config_addr pci_config_addr; + +static struct device *find_pci_device(unsigned int index) +{ + return devices.pci[index]; +} + +/* PCI can do 1, 2 and 4 byte reads; we handle that here. */ +static void ioread(u16 off, u32 v, u32 mask, u32 *val) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *val = (v >> (off * 8)) & mask; +} + +/* PCI can do 1, 2 and 4 byte writes; we handle that here. */ +static void iowrite(u16 off, u32 v, u32 mask, u32 *dst) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *dst &= ~(mask << (off * 8)); + *dst |= (v & mask) << (off * 8); +} + +/* + * Where PCI_CONFIG_DATA accesses depends on the previous write to + * PCI_CONFIG_ADDR. + */ +static struct device *dev_and_reg(u32 *reg) +{ + if (!pci_config_addr.bits.enabled) + return NULL; + + if (pci_config_addr.bits.funcnum != 0) + return NULL; + + if (pci_config_addr.bits.busnum != 0) + return NULL; + + if (pci_config_addr.bits.offset * 4 >= sizeof(struct pci_config)) + return NULL; + + *reg = pci_config_addr.bits.offset; + return find_pci_device(pci_config_addr.bits.devnum); +} + +/* + * We can get invalid combinations of values while they're writing, so we + * only fault if they try to write with some invalid bar/offset/length. + */ +static bool valid_bar_access(struct device *d, + struct virtio_pci_cfg_cap *cfg_access) +{ + /* We only have 1 bar (BAR0) */ + if (cfg_access->cap.bar != 0) + return false; + + /* Check it's within BAR0. */ + if (cfg_access->cap.offset >= d->mmio_size + || cfg_access->cap.offset + cfg_access->cap.length > d->mmio_size) + return false; + + /* Check length is 1, 2 or 4. */ + if (cfg_access->cap.length != 1 + && cfg_access->cap.length != 2 + && cfg_access->cap.length != 4) + return false; + + /* + * 4.1.4.7.2: + * + * The driver MUST NOT write a cap.offset which is not a multiple of + * cap.length (ie. all accesses MUST be aligned). + */ + if (cfg_access->cap.offset % cfg_access->cap.length != 0) + return false; + + /* Return pointer into word in BAR0. */ + return true; +} + +/* Is this accessing the PCI config address port?. */ +static bool is_pci_addr_port(u16 port) +{ + return port >= PCI_CONFIG_ADDR && port < PCI_CONFIG_ADDR + 4; +} + +static bool pci_addr_iowrite(u16 port, u32 mask, u32 val) +{ + iowrite(port - PCI_CONFIG_ADDR, val, mask, + &pci_config_addr.val); + verbose("PCI%s: %#x/%x: bus %u dev %u func %u reg %u\n", + pci_config_addr.bits.enabled ? "" : " DISABLED", + val, mask, + pci_config_addr.bits.busnum, + pci_config_addr.bits.devnum, + pci_config_addr.bits.funcnum, + pci_config_addr.bits.offset); + return true; +} + +static void pci_addr_ioread(u16 port, u32 mask, u32 *val) +{ + ioread(port - PCI_CONFIG_ADDR, pci_config_addr.val, mask, val); +} + +/* Is this accessing the PCI config data port?. */ +static bool is_pci_data_port(u16 port) +{ + return port >= PCI_CONFIG_DATA && port < PCI_CONFIG_DATA + 4; +} + +static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask); + +static bool pci_data_iowrite(u16 port, u32 mask, u32 val) +{ + u32 reg, portoff; + struct device *d = dev_and_reg(®); + + /* Complain if they don't belong to a device. */ + if (!d) + return false; + + /* They can do 1 byte writes, etc. */ + portoff = port - PCI_CONFIG_DATA; + + /* + * PCI uses a weird way to determine the BAR size: the OS + * writes all 1's, and sees which ones stick. + */ + if (&d->config_words[reg] == &d->config.bar[0]) { + int i; + + iowrite(portoff, val, mask, &d->config.bar[0]); + for (i = 0; (1 << i) < d->mmio_size; i++) + d->config.bar[0] &= ~(1 << i); + return true; + } else if ((&d->config_words[reg] > &d->config.bar[0] + && &d->config_words[reg] <= &d->config.bar[6]) + || &d->config_words[reg] == &d->config.expansion_rom_addr) { + /* Allow writing to any other BAR, or expansion ROM */ + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + /* We let them overide latency timer and cacheline size */ + } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { + /* Only let them change the first two fields. */ + if (mask == 0xFFFFFFFF) + mask = 0xFFFF; + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + } else if (&d->config_words[reg] == (void *)&d->config.command + && mask == 0xFFFF) { + /* Ignore command writes. */ + return true; + } else if (&d->config_words[reg] + == (void *)&d->config.cfg_access.cap.bar + || &d->config_words[reg] + == &d->config.cfg_access.cap.length + || &d->config_words[reg] + == &d->config.cfg_access.cap.offset) { + + /* + * The VIRTIO_PCI_CAP_PCI_CFG capability + * provides a backdoor to access the MMIO + * regions without mapping them. Weird, but + * useful. + */ + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + } else if (&d->config_words[reg] == &d->config.cfg_access.pci_cfg_data) { + u32 write_mask; + + /* + * 4.1.4.7.1: + * + * Upon detecting driver write access to pci_cfg_data, the + * device MUST execute a write access at offset cap.offset at + * BAR selected by cap.bar using the first cap.length bytes + * from pci_cfg_data. + */ + + /* Must be bar 0 */ + if (!valid_bar_access(d, &d->config.cfg_access)) + return false; + + iowrite(portoff, val, mask, &d->config.cfg_access.pci_cfg_data); + + /* + * Now emulate a write. The mask we use is set by + * len, *not* this write! + */ + write_mask = (1ULL<<(8*d->config.cfg_access.cap.length)) - 1; + verbose("Window writing %#x/%#x to bar %u, offset %u len %u\n", + d->config.cfg_access.pci_cfg_data, write_mask, + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + + emulate_mmio_write(d, d->config.cfg_access.cap.offset, + d->config.cfg_access.pci_cfg_data, + write_mask); + return true; + } + + /* + * 4.1.4.1: + * + * The driver MUST NOT write into any field of the capability + * structure, with the exception of those with cap_type + * VIRTIO_PCI_CAP_PCI_CFG... + */ + return false; +} + +static u32 emulate_mmio_read(struct device *d, u32 off, u32 mask); + +static void pci_data_ioread(u16 port, u32 mask, u32 *val) +{ + u32 reg; + struct device *d = dev_and_reg(®); + + if (!d) + return; + + /* Read through the PCI MMIO access window is special */ + if (&d->config_words[reg] == &d->config.cfg_access.pci_cfg_data) { + u32 read_mask; + + /* + * 4.1.4.7.1: + * + * Upon detecting driver read access to pci_cfg_data, the + * device MUST execute a read access of length cap.length at + * offset cap.offset at BAR selected by cap.bar and store the + * first cap.length bytes in pci_cfg_data. + */ + /* Must be bar 0 */ + if (!valid_bar_access(d, &d->config.cfg_access)) + bad_driver(d, + "Invalid cfg_access to bar%u, offset %u len %u", + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + + /* + * Read into the window. The mask we use is set by + * len, *not* this read! + */ + read_mask = (1ULL<<(8*d->config.cfg_access.cap.length))-1; + d->config.cfg_access.pci_cfg_data + = emulate_mmio_read(d, + d->config.cfg_access.cap.offset, + read_mask); + verbose("Window read %#x/%#x from bar %u, offset %u len %u\n", + d->config.cfg_access.pci_cfg_data, read_mask, + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + } + ioread(port - PCI_CONFIG_DATA, d->config_words[reg], mask, val); +} + /*L:216 - * This actually creates the thread which services the virtqueue for a device. + * This is where we emulate a handful of Guest instructions. It's ugly + * and we used to do it in the kernel but it grew over time. + */ + +/* + * We use the ptrace syscall's pt_regs struct to talk about registers + * to lguest: these macros convert the names to the offsets. + */ +#define getreg(name) getreg_off(offsetof(struct user_regs_struct, name)) +#define setreg(name, val) \ + setreg_off(offsetof(struct user_regs_struct, name), (val)) + +static u32 getreg_off(size_t offset) +{ + u32 r; + unsigned long args[] = { LHREQ_GETREG, offset }; + + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) + err(1, "Getting register %u", offset); + if (pread(lguest_fd, &r, sizeof(r), cpu_id) != sizeof(r)) + err(1, "Reading register %u", offset); + + return r; +} + +static void setreg_off(size_t offset, u32 val) +{ + unsigned long args[] = { LHREQ_SETREG, offset, val }; + + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) + err(1, "Setting register %u", offset); +} + +/* Get register by instruction encoding */ +static u32 getreg_num(unsigned regnum, u32 mask) +{ + /* 8 bit ops use regnums 4-7 for high parts of word */ + if (mask == 0xFF && (regnum & 0x4)) + return getreg_num(regnum & 0x3, 0xFFFF) >> 8; + + switch (regnum) { + case 0: return getreg(eax) & mask; + case 1: return getreg(ecx) & mask; + case 2: return getreg(edx) & mask; + case 3: return getreg(ebx) & mask; + case 4: return getreg(esp) & mask; + case 5: return getreg(ebp) & mask; + case 6: return getreg(esi) & mask; + case 7: return getreg(edi) & mask; + } + abort(); +} + +/* Set register by instruction encoding */ +static void setreg_num(unsigned regnum, u32 val, u32 mask) +{ + /* Don't try to set bits out of range */ + assert(~(val & ~mask)); + + /* 8 bit ops use regnums 4-7 for high parts of word */ + if (mask == 0xFF && (regnum & 0x4)) { + /* Construct the 16 bits we want. */ + val = (val << 8) | getreg_num(regnum & 0x3, 0xFF); + setreg_num(regnum & 0x3, val, 0xFFFF); + return; + } + + switch (regnum) { + case 0: setreg(eax, val | (getreg(eax) & ~mask)); return; + case 1: setreg(ecx, val | (getreg(ecx) & ~mask)); return; + case 2: setreg(edx, val | (getreg(edx) & ~mask)); return; + case 3: setreg(ebx, val | (getreg(ebx) & ~mask)); return; + case 4: setreg(esp, val | (getreg(esp) & ~mask)); return; + case 5: setreg(ebp, val | (getreg(ebp) & ~mask)); return; + case 6: setreg(esi, val | (getreg(esi) & ~mask)); return; + case 7: setreg(edi, val | (getreg(edi) & ~mask)); return; + } + abort(); +} + +/* Get bytes of displacement appended to instruction, from r/m encoding */ +static u32 insn_displacement_len(u8 mod_reg_rm) +{ + /* Switch on the mod bits */ + switch (mod_reg_rm >> 6) { + case 0: + /* If mod == 0, and r/m == 101, 16-bit displacement follows */ + if ((mod_reg_rm & 0x7) == 0x5) + return 2; + /* Normally, mod == 0 means no literal displacement */ + return 0; + case 1: + /* One byte displacement */ + return 1; + case 2: + /* Four byte displacement */ + return 4; + case 3: + /* Register mode */ + return 0; + } + abort(); +} + +static void emulate_insn(const u8 insn[]) +{ + unsigned long args[] = { LHREQ_TRAP, 13 }; + unsigned int insnlen = 0, in = 0, small_operand = 0, byte_access; + unsigned int eax, port, mask; + /* + * Default is to return all-ones on IO port reads, which traditionally + * means "there's nothing there". + */ + u32 val = 0xFFFFFFFF; + + /* + * This must be the Guest kernel trying to do something, not userspace! + * The bottom two bits of the CS segment register are the privilege + * level. + */ + if ((getreg(xcs) & 3) != 0x1) + goto no_emulate; + + /* Decoding x86 instructions is icky. */ + + /* + * Around 2.6.33, the kernel started using an emulation for the + * cmpxchg8b instruction in early boot on many configurations. This + * code isn't paravirtualized, and it tries to disable interrupts. + * Ignore it, which will Mostly Work. + */ + if (insn[insnlen] == 0xfa) { + /* "cli", or Clear Interrupt Enable instruction. Skip it. */ + insnlen = 1; + goto skip_insn; + } + + /* + * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. + */ + if (insn[insnlen] == 0x66) { + small_operand = 1; + /* The instruction is 1 byte so far, read the next byte. */ + insnlen = 1; + } + + /* If the lower bit isn't set, it's a single byte access */ + byte_access = !(insn[insnlen] & 1); + + /* + * Now we can ignore the lower bit and decode the 4 opcodes + * we need to emulate. + */ + switch (insn[insnlen] & 0xFE) { + case 0xE4: /* in <next byte>,%al */ + port = insn[insnlen+1]; + insnlen += 2; + in = 1; + break; + case 0xEC: /* in (%dx),%al */ + port = getreg(edx) & 0xFFFF; + insnlen += 1; + in = 1; + break; + case 0xE6: /* out %al,<next byte> */ + port = insn[insnlen+1]; + insnlen += 2; + break; + case 0xEE: /* out %al,(%dx) */ + port = getreg(edx) & 0xFFFF; + insnlen += 1; + break; + default: + /* OK, we don't know what this is, can't emulate. */ + goto no_emulate; + } + + /* Set a mask of the 1, 2 or 4 bytes, depending on size of IO */ + if (byte_access) + mask = 0xFF; + else if (small_operand) + mask = 0xFFFF; + else + mask = 0xFFFFFFFF; + + /* + * If it was an "IN" instruction, they expect the result to be read + * into %eax, so we change %eax. + */ + eax = getreg(eax); + + if (in) { + /* This is the PS/2 keyboard status; 1 means ready for output */ + if (port == 0x64) + val = 1; + else if (is_pci_addr_port(port)) + pci_addr_ioread(port, mask, &val); + else if (is_pci_data_port(port)) + pci_data_ioread(port, mask, &val); + + /* Clear the bits we're about to read */ + eax &= ~mask; + /* Copy bits in from val. */ + eax |= val & mask; + /* Now update the register. */ + setreg(eax, eax); + } else { + if (is_pci_addr_port(port)) { + if (!pci_addr_iowrite(port, mask, eax)) + goto bad_io; + } else if (is_pci_data_port(port)) { + if (!pci_data_iowrite(port, mask, eax)) + goto bad_io; + } + /* There are many other ports, eg. CMOS clock, serial + * and parallel ports, so we ignore them all. */ + } + + verbose("IO %s of %x to %u: %#08x\n", + in ? "IN" : "OUT", mask, port, eax); +skip_insn: + /* Finally, we've "done" the instruction, so move past it. */ + setreg(eip, getreg(eip) + insnlen); + return; + +bad_io: + warnx("Attempt to %s port %u (%#x mask)", + in ? "read from" : "write to", port, mask); + +no_emulate: + /* Inject trap into Guest. */ + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Reinjecting trap 13 for fault at %#x", getreg(eip)); +} + +static struct device *find_mmio_region(unsigned long paddr, u32 *off) +{ + unsigned int i; + + for (i = 1; i < MAX_PCI_DEVICES; i++) { + struct device *d = devices.pci[i]; + + if (!d) + continue; + if (paddr < d->mmio_addr) + continue; + if (paddr >= d->mmio_addr + d->mmio_size) + continue; + *off = paddr - d->mmio_addr; + return d; + } + return NULL; +} + +/* FIXME: Use vq array. */ +static struct virtqueue *vq_by_num(struct device *d, u32 num) +{ + struct virtqueue *vq = d->vq; + + while (num-- && vq) + vq = vq->next; + + return vq; +} + +static void save_vq_config(const struct virtio_pci_common_cfg *cfg, + struct virtqueue *vq) +{ + vq->pci_config = *cfg; +} + +static void restore_vq_config(struct virtio_pci_common_cfg *cfg, + struct virtqueue *vq) +{ + /* Only restore the per-vq part */ + size_t off = offsetof(struct virtio_pci_common_cfg, queue_size); + + memcpy((void *)cfg + off, (void *)&vq->pci_config + off, + sizeof(*cfg) - off); +} + +/* + * 4.1.4.3.2: + * + * The driver MUST configure the other virtqueue fields before + * enabling the virtqueue with queue_enable. + * + * When they enable the virtqueue, we check that their setup is valid. */ -static void create_thread(struct virtqueue *vq) +static void check_virtqueue(struct device *d, struct virtqueue *vq) +{ + /* Because lguest is 32 bit, all the descriptor high bits must be 0 */ + if (vq->pci_config.queue_desc_hi + || vq->pci_config.queue_avail_hi + || vq->pci_config.queue_used_hi) + bad_driver_vq(vq, "invalid 64-bit queue address"); + + /* + * 2.4.1: + * + * The driver MUST ensure that the physical address of the first byte + * of each virtqueue part is a multiple of the specified alignment + * value in the above table. + */ + if (vq->pci_config.queue_desc_lo % 16 + || vq->pci_config.queue_avail_lo % 2 + || vq->pci_config.queue_used_lo % 4) + bad_driver_vq(vq, "invalid alignment in queue addresses"); + + /* Initialize the virtqueue and check they're all in range. */ + vq->vring.num = vq->pci_config.queue_size; + vq->vring.desc = check_pointer(vq->dev, + vq->pci_config.queue_desc_lo, + sizeof(*vq->vring.desc) * vq->vring.num); + vq->vring.avail = check_pointer(vq->dev, + vq->pci_config.queue_avail_lo, + sizeof(*vq->vring.avail) + + (sizeof(vq->vring.avail->ring[0]) + * vq->vring.num)); + vq->vring.used = check_pointer(vq->dev, + vq->pci_config.queue_used_lo, + sizeof(*vq->vring.used) + + (sizeof(vq->vring.used->ring[0]) + * vq->vring.num)); + + /* + * 2.4.9.1: + * + * The driver MUST initialize flags in the used ring to 0 + * when allocating the used ring. + */ + if (vq->vring.used->flags != 0) + bad_driver_vq(vq, "invalid initial used.flags %#x", + vq->vring.used->flags); +} + +static void start_virtqueue(struct virtqueue *vq) { /* * Create stack for thread. Since the stack grows upwards, we point * the stack pointer to the end of this region. */ char *stack = malloc(32768); - unsigned long args[] = { LHREQ_EVENTFD, - vq->config.pfn*getpagesize(), 0 }; /* Create a zero-initialized eventfd. */ vq->eventfd = eventfd(0, 0); if (vq->eventfd < 0) err(1, "Creating eventfd"); - args[2] = vq->eventfd; - - /* - * Attach an eventfd to this virtqueue: it will go off when the Guest - * does an LHCALL_NOTIFY for this vq. - */ - if (write(lguest_fd, &args, sizeof(args)) != 0) - err(1, "Attaching eventfd"); /* * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so @@ -1048,167 +1859,531 @@ static void create_thread(struct virtqueue *vq) vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); if (vq->thread == (pid_t)-1) err(1, "Creating clone"); - - /* We close our local copy now the child has it. */ - close(vq->eventfd); } -static void start_device(struct device *dev) +static void start_virtqueues(struct device *d) { - unsigned int i; struct virtqueue *vq; - verbose("Device %s OK: offered", dev->name); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev)[i]); - verbose(", accepted"); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev) - [dev->feature_len+i]); - - for (vq = dev->vq; vq; vq = vq->next) { - if (vq->service) - create_thread(vq); + for (vq = d->vq; vq; vq = vq->next) { + if (vq->pci_config.queue_enable) + start_virtqueue(vq); } - dev->running = true; } -static void cleanup_devices(void) +static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask) { - struct device *dev; + struct virtqueue *vq; - for (dev = devices.dev; dev; dev = dev->next) - reset_device(dev); + switch (off) { + case offsetof(struct virtio_pci_mmio, cfg.device_feature_select): + /* + * 4.1.4.3.1: + * + * The device MUST present the feature bits it is offering in + * device_feature, starting at bit device_feature_select ∗ 32 + * for any device_feature_select written by the driver + */ + if (val == 0) + d->mmio->cfg.device_feature = d->features; + else if (val == 1) + d->mmio->cfg.device_feature = (d->features >> 32); + else + d->mmio->cfg.device_feature = 0; + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.guest_feature_select): + if (val > 1) + bad_driver(d, "Unexpected driver select %u", val); + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.guest_feature): + if (d->mmio->cfg.guest_feature_select == 0) { + d->features_accepted &= ~((u64)0xFFFFFFFF); + d->features_accepted |= val; + } else { + assert(d->mmio->cfg.guest_feature_select == 1); + d->features_accepted &= 0xFFFFFFFF; + d->features_accepted |= ((u64)val) << 32; + } + /* + * 2.2.1: + * + * The driver MUST NOT accept a feature which the device did + * not offer + */ + if (d->features_accepted & ~d->features) + bad_driver(d, "over-accepted features %#llx of %#llx", + d->features_accepted, d->features); + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.device_status): { + u8 prev; + + verbose("%s: device status -> %#x\n", d->name, val); + /* + * 4.1.4.3.1: + * + * The device MUST reset when 0 is written to device_status, + * and present a 0 in device_status once that is done. + */ + if (val == 0) { + reset_device(d); + goto write_through8; + } - /* If we saved off the original terminal settings, restore them now. */ - if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) - tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); -} + /* 2.1.1: The driver MUST NOT clear a device status bit. */ + if (d->mmio->cfg.device_status & ~val) + bad_driver(d, "unset of device status bit %#x -> %#x", + d->mmio->cfg.device_status, val); -/* When the Guest tells us they updated the status field, we handle it. */ -static void update_device_status(struct device *dev) -{ - /* A zero status is a reset, otherwise it's a set of flags. */ - if (dev->desc->status == 0) - reset_device(dev); - else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { - warnx("Device %s configuration FAILED", dev->name); - if (dev->running) - reset_device(dev); - } else { - if (dev->running) - err(1, "Device %s features finalized twice", dev->name); - start_device(dev); + /* + * 2.1.2: + * + * The device MUST NOT consume buffers or notify the driver + * before DRIVER_OK. + */ + if (val & VIRTIO_CONFIG_S_DRIVER_OK + && !(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK)) + start_virtqueues(d); + + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + * - Reset the device. + * - Set the ACKNOWLEDGE status bit: the guest OS has + * notice the device. + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. During this step the driver MAY + * read (but MUST NOT write) the device-specific + * configuration fields to check that it can + * support the device before accepting it. + * - Set the FEATURES_OK status bit. The driver + * MUST not accept new feature bits after this + * step. + * - Re-read device status to ensure the FEATURES_OK + * bit is still set: otherwise, the device does + * not support our subset of features and the + * device is unusable. + * - Perform device-specific setup, including + * discovery of virtqueues for the device, + * optional per-bus setup, reading and possibly + * writing the device’s virtio configuration + * space, and population of virtqueues. + * - Set the DRIVER_OK status bit. At this point the + * device is “live”. + */ + prev = 0; + switch (val & ~d->mmio->cfg.device_status) { + case VIRTIO_CONFIG_S_DRIVER_OK: + prev |= VIRTIO_CONFIG_S_FEATURES_OK; /* fall thru */ + case VIRTIO_CONFIG_S_FEATURES_OK: + prev |= VIRTIO_CONFIG_S_DRIVER; /* fall thru */ + case VIRTIO_CONFIG_S_DRIVER: + prev |= VIRTIO_CONFIG_S_ACKNOWLEDGE; /* fall thru */ + case VIRTIO_CONFIG_S_ACKNOWLEDGE: + break; + default: + bad_driver(d, "unknown device status bit %#x -> %#x", + d->mmio->cfg.device_status, val); + } + if (d->mmio->cfg.device_status != prev) + bad_driver(d, "unexpected status transition %#x -> %#x", + d->mmio->cfg.device_status, val); + + /* If they just wrote FEATURES_OK, we make sure they read */ + switch (val & ~d->mmio->cfg.device_status) { + case VIRTIO_CONFIG_S_FEATURES_OK: + d->wrote_features_ok = true; + break; + case VIRTIO_CONFIG_S_DRIVER_OK: + if (d->wrote_features_ok) + bad_driver(d, "did not re-read FEATURES_OK"); + break; + } + goto write_through8; } -} + case offsetof(struct virtio_pci_mmio, cfg.queue_select): + vq = vq_by_num(d, val); + /* + * 4.1.4.3.1: + * + * The device MUST present a 0 in queue_size if the virtqueue + * corresponding to the current queue_select is unavailable. + */ + if (!vq) { + d->mmio->cfg.queue_size = 0; + goto write_through16; + } + /* Save registers for old vq, if it was a valid vq */ + if (d->mmio->cfg.queue_size) + save_vq_config(&d->mmio->cfg, + vq_by_num(d, d->mmio->cfg.queue_select)); + /* Restore the registers for the queue they asked for */ + restore_vq_config(&d->mmio->cfg, vq); + goto write_through16; + case offsetof(struct virtio_pci_mmio, cfg.queue_size): + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write a value which is not a power of 2 + * to queue_size. + */ + if (val & (val-1)) + bad_driver(d, "invalid queue size %u", val); + if (d->mmio->cfg.queue_enable) + bad_driver(d, "changing queue size on live device"); + goto write_through16; + case offsetof(struct virtio_pci_mmio, cfg.queue_msix_vector): + bad_driver(d, "attempt to set MSIX vector to %u", val); + case offsetof(struct virtio_pci_mmio, cfg.queue_enable): { + struct virtqueue *vq = vq_by_num(d, d->mmio->cfg.queue_select); -/*L:215 - * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In - * particular, it's used to notify us of device status changes during boot. - */ -static void handle_output(unsigned long addr) -{ - struct device *i; + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write a 0 to queue_enable. + */ + if (val != 1) + bad_driver(d, "setting queue_enable to %u", val); - /* Check each device. */ - for (i = devices.dev; i; i = i->next) { - struct virtqueue *vq; + /* + * 3.1.1: + * + * 7. Perform device-specific setup, including discovery of + * virtqueues for the device, optional per-bus setup, + * reading and possibly writing the device’s virtio + * configuration space, and population of virtqueues. + * 8. Set the DRIVER_OK status bit. + * + * All our devices require all virtqueues to be enabled, so + * they should have done that before setting DRIVER_OK. + */ + if (d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK) + bad_driver(d, "enabling vq after DRIVER_OK"); + d->mmio->cfg.queue_enable = val; + save_vq_config(&d->mmio->cfg, vq); + check_virtqueue(d, vq); + goto write_through16; + } + case offsetof(struct virtio_pci_mmio, cfg.queue_notify_off): + bad_driver(d, "attempt to write to queue_notify_off"); + case offsetof(struct virtio_pci_mmio, cfg.queue_desc_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_desc_hi): + case offsetof(struct virtio_pci_mmio, cfg.queue_avail_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_avail_hi): + case offsetof(struct virtio_pci_mmio, cfg.queue_used_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_used_hi): /* - * Notifications to device descriptors mean they updated the - * device status. + * 4.1.4.3.2: + * + * The driver MUST configure the other virtqueue fields before + * enabling the virtqueue with queue_enable. */ - if (from_guest_phys(addr) == i->desc) { - update_device_status(i); - return; - } + if (d->mmio->cfg.queue_enable) + bad_driver(d, "changing queue on live device"); + + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * 5. Set the FEATURES_OK status bit. The driver MUST not + * accept new feature bits after this step. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK)) + bad_driver(d, "setting up vq before FEATURES_OK"); - /* Devices should not be used before features are finalized. */ - for (vq = i->vq; vq; vq = vq->next) { - if (addr != vq->config.pfn*getpagesize()) - continue; - errx(1, "Notification on %s before setup!", i->name); + /* + * 6. Re-read device status to ensure the FEATURES_OK bit is + * still set... + */ + if (d->wrote_features_ok) + bad_driver(d, "didn't re-read FEATURES_OK before setup"); + + goto write_through32; + case offsetof(struct virtio_pci_mmio, notify): + vq = vq_by_num(d, val); + if (!vq) + bad_driver(d, "Invalid vq notification on %u", val); + /* Notify the process handling this vq by adding 1 to eventfd */ + write(vq->eventfd, "\1\0\0\0\0\0\0\0", 8); + goto write_through16; + case offsetof(struct virtio_pci_mmio, isr): + bad_driver(d, "Unexpected write to isr"); + /* Weird corner case: write to emerg_wr of console */ + case sizeof(struct virtio_pci_mmio) + + offsetof(struct virtio_console_config, emerg_wr): + if (strcmp(d->name, "console") == 0) { + char c = val; + write(STDOUT_FILENO, &c, 1); + goto write_through32; } + /* Fall through... */ + default: + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write to device_feature, num_queues, + * config_generation or queue_notify_off. + */ + bad_driver(d, "Unexpected write to offset %u", off); } +feature_write_through32: /* - * Early console write is done using notify on a nul-terminated string - * in Guest memory. It's also great for hacking debugging messages - * into a Guest. + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. + *... + * - Set the FEATURES_OK status bit. The driver MUST not + * accept new feature bits after this step. */ - if (addr >= guest_limit) - errx(1, "Bad NOTIFY %#lx", addr); + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, "feature write before VIRTIO_CONFIG_S_DRIVER"); + if (d->mmio->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK) + bad_driver(d, "feature write after VIRTIO_CONFIG_S_FEATURES_OK"); - write(STDOUT_FILENO, from_guest_phys(addr), - strnlen(from_guest_phys(addr), guest_limit - addr)); + /* + * 4.1.3.1: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses for + * 16-bit fields and 8-bit accesses for 8-bit fields. + */ +write_through32: + if (mask != 0xFFFFFFFF) { + bad_driver(d, "non-32-bit write to offset %u (%#x)", + off, getreg(eip)); + return; + } + memcpy((char *)d->mmio + off, &val, 4); + return; + +write_through16: + if (mask != 0xFFFF) + bad_driver(d, "non-16-bit write to offset %u (%#x)", + off, getreg(eip)); + memcpy((char *)d->mmio + off, &val, 2); + return; + +write_through8: + if (mask != 0xFF) + bad_driver(d, "non-8-bit write to offset %u (%#x)", + off, getreg(eip)); + memcpy((char *)d->mmio + off, &val, 1); + return; } -/*L:190 - * Device Setup - * - * All devices need a descriptor so the Guest knows it exists, and a "struct - * device" so the Launcher can keep track of it. We have common helper - * routines to allocate and manage them. - */ - -/* - * The layout of the device page is a "struct lguest_device_desc" followed by a - * number of virtqueue descriptors, then two sets of feature bits, then an - * array of configuration bytes. This routine returns the configuration - * pointer. - */ -static u8 *device_config(const struct device *dev) +static u32 emulate_mmio_read(struct device *d, u32 off, u32 mask) { - return (void *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig) - + dev->feature_len * 2; + u8 isr; + u32 val = 0; + + switch (off) { + case offsetof(struct virtio_pci_mmio, cfg.device_feature_select): + case offsetof(struct virtio_pci_mmio, cfg.device_feature): + case offsetof(struct virtio_pci_mmio, cfg.guest_feature_select): + case offsetof(struct virtio_pci_mmio, cfg.guest_feature): + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, + "feature read before VIRTIO_CONFIG_S_DRIVER"); + goto read_through32; + case offsetof(struct virtio_pci_mmio, cfg.msix_config): + bad_driver(d, "read of msix_config"); + case offsetof(struct virtio_pci_mmio, cfg.num_queues): + goto read_through16; + case offsetof(struct virtio_pci_mmio, cfg.device_status): + /* As they did read, any write of FEATURES_OK is now fine. */ + d->wrote_features_ok = false; + goto read_through8; + case offsetof(struct virtio_pci_mmio, cfg.config_generation): + /* + * 4.1.4.3.1: + * + * The device MUST present a changed config_generation after + * the driver has read a device-specific configuration value + * which has changed since any part of the device-specific + * configuration was last read. + * + * This is simple: none of our devices change config, so this + * is always 0. + */ + goto read_through8; + case offsetof(struct virtio_pci_mmio, notify): + /* + * 3.1.1: + * + * The driver MUST NOT notify the device before setting + * DRIVER_OK. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK)) + bad_driver(d, "notify before VIRTIO_CONFIG_S_DRIVER_OK"); + goto read_through16; + case offsetof(struct virtio_pci_mmio, isr): + if (mask != 0xFF) + bad_driver(d, "non-8-bit read from offset %u (%#x)", + off, getreg(eip)); + isr = d->mmio->isr; + /* + * 4.1.4.5.1: + * + * The device MUST reset ISR status to 0 on driver read. + */ + d->mmio->isr = 0; + return isr; + case offsetof(struct virtio_pci_mmio, padding): + bad_driver(d, "read from padding (%#x)", getreg(eip)); + default: + /* Read from device config space, beware unaligned overflow */ + if (off > d->mmio_size - 4) + bad_driver(d, "read past end (%#x)", getreg(eip)); + + /* + * 3.1.1: + * The driver MUST follow this sequence to initialize a device: + *... + * 3. Set the DRIVER status bit: the guest OS knows how to + * drive the device. + * 4. Read device feature bits, and write the subset of + * feature bits understood by the OS and driver to the + * device. During this step the driver MAY read (but MUST NOT + * write) the device-specific configuration fields to check + * that it can support the device before accepting it. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, + "config read before VIRTIO_CONFIG_S_DRIVER"); + + if (mask == 0xFFFFFFFF) + goto read_through32; + else if (mask == 0xFFFF) + goto read_through16; + else + goto read_through8; + } + + /* + * 4.1.3.1: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses for + * 16-bit fields and 8-bit accesses for 8-bit fields. + */ +read_through32: + if (mask != 0xFFFFFFFF) + bad_driver(d, "non-32-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 4); + return val; + +read_through16: + if (mask != 0xFFFF) + bad_driver(d, "non-16-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 2); + return val; + +read_through8: + if (mask != 0xFF) + bad_driver(d, "non-8-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 1); + return val; } -/* - * This routine allocates a new "struct lguest_device_desc" from descriptor - * table page just above the Guest's normal memory. It returns a pointer to - * that descriptor. - */ -static struct lguest_device_desc *new_dev_desc(u16 type) +static void emulate_mmio(unsigned long paddr, const u8 *insn) { - struct lguest_device_desc d = { .type = type }; - void *p; + u32 val, off, mask = 0xFFFFFFFF, insnlen = 0; + struct device *d = find_mmio_region(paddr, &off); + unsigned long args[] = { LHREQ_TRAP, 14 }; - /* Figure out where the next device config is, based on the last one. */ - if (devices.lastdev) - p = device_config(devices.lastdev) - + devices.lastdev->desc->config_len; - else - p = devices.descpage; + if (!d) { + warnx("MMIO touching %#08lx (not a device)", paddr); + goto reinject; + } + + /* Prefix makes it a 16 bit op */ + if (insn[0] == 0x66) { + mask = 0xFFFF; + insnlen++; + } - /* We only have one page for all the descriptors. */ - if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) - errx(1, "Too many devices"); + /* iowrite */ + if (insn[insnlen] == 0x89) { + /* Next byte is r/m byte: bits 3-5 are register. */ + val = getreg_num((insn[insnlen+1] >> 3) & 0x7, mask); + emulate_mmio_write(d, off, val, mask); + insnlen += 2 + insn_displacement_len(insn[insnlen+1]); + } else if (insn[insnlen] == 0x8b) { /* ioread */ + /* Next byte is r/m byte: bits 3-5 are register. */ + val = emulate_mmio_read(d, off, mask); + setreg_num((insn[insnlen+1] >> 3) & 0x7, val, mask); + insnlen += 2 + insn_displacement_len(insn[insnlen+1]); + } else if (insn[0] == 0x88) { /* 8-bit iowrite */ + mask = 0xff; + /* Next byte is r/m byte: bits 3-5 are register. */ + val = getreg_num((insn[1] >> 3) & 0x7, mask); + emulate_mmio_write(d, off, val, mask); + insnlen = 2 + insn_displacement_len(insn[1]); + } else if (insn[0] == 0x8a) { /* 8-bit ioread */ + mask = 0xff; + val = emulate_mmio_read(d, off, mask); + setreg_num((insn[1] >> 3) & 0x7, val, mask); + insnlen = 2 + insn_displacement_len(insn[1]); + } else { + warnx("Unknown MMIO instruction touching %#08lx:" + " %02x %02x %02x %02x at %u", + paddr, insn[0], insn[1], insn[2], insn[3], getreg(eip)); + reinject: + /* Inject trap into Guest. */ + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Reinjecting trap 14 for fault at %#x", + getreg(eip)); + return; + } - /* p might not be aligned, so we memcpy in. */ - return memcpy(p, &d, sizeof(d)); + /* Finally, we've "done" the instruction, so move past it. */ + setreg(eip, getreg(eip) + insnlen); } -/* - * Each device descriptor is followed by the description of its virtqueues. We - * specify how many descriptors the virtqueue is to have. +/*L:190 + * Device Setup + * + * All devices need a descriptor so the Guest knows it exists, and a "struct + * device" so the Launcher can keep track of it. We have common helper + * routines to allocate and manage them. */ -static void add_virtqueue(struct device *dev, unsigned int num_descs, - void (*service)(struct virtqueue *)) +static void add_pci_virtqueue(struct device *dev, + void (*service)(struct virtqueue *), + const char *name) { - unsigned int pages; struct virtqueue **i, *vq = malloc(sizeof(*vq)); - void *p; - - /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) - / getpagesize(); - p = get_pages(pages); /* Initialize the virtqueue */ vq->next = NULL; vq->last_avail_idx = 0; vq->dev = dev; + vq->name = name; /* * This is the routine the service thread will run, and its Process ID @@ -1218,25 +2393,11 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->thread = (pid_t)-1; /* Initialize the configuration. */ - vq->config.num = num_descs; - vq->config.irq = devices.next_irq++; - vq->config.pfn = to_guest_phys(p) / getpagesize(); - - /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); - - /* - * Append virtqueue to this device's descriptor. We use - * device_config() to get the end of the device's current virtqueues; - * we check that we haven't added any config or feature information - * yet, otherwise we'd be overwriting them. - */ - assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); - memcpy(device_config(dev), &vq->config, sizeof(vq->config)); - dev->num_vq++; - dev->desc->num_vq++; + reset_vq_pci_config(vq); + vq->pci_config.queue_notify_off = 0; - verbose("Virtqueue page %#lx\n", to_guest_phys(p)); + /* Add one to the number of queues */ + vq->dev->mmio->cfg.num_queues++; /* * Add to tail of list, so dev->vq is first vq, dev->vq->next is @@ -1246,73 +2407,239 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, *i = vq; } -/* - * The first half of the feature bitmask is for us to advertise features. The - * second half is for the Guest to accept features. - */ -static void add_feature(struct device *dev, unsigned bit) +/* The Guest accesses the feature bits via the PCI common config MMIO region */ +static void add_pci_feature(struct device *dev, unsigned bit) { - u8 *features = get_feature_bits(dev); + dev->features |= (1ULL << bit); +} - /* We can't extend the feature bits once we've added config bytes */ - if (dev->desc->feature_len <= bit / CHAR_BIT) { - assert(dev->desc->config_len == 0); - dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; - } +/* For devices with no config. */ +static void no_device_config(struct device *dev) +{ + dev->mmio_addr = get_mmio_region(dev->mmio_size); - features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); + dev->config.bar[0] = dev->mmio_addr; + /* Bottom 4 bits must be zero */ + assert(~(dev->config.bar[0] & 0xF)); +} + +/* This puts the device config into BAR0 */ +static void set_device_config(struct device *dev, const void *conf, size_t len) +{ + /* Set up BAR 0 */ + dev->mmio_size += len; + dev->mmio = realloc(dev->mmio, dev->mmio_size); + memcpy(dev->mmio + 1, conf, len); + + /* + * 4.1.4.6: + * + * The device MUST present at least one VIRTIO_PCI_CAP_DEVICE_CFG + * capability for any device type which has a device-specific + * configuration. + */ + /* Hook up device cfg */ + dev->config.cfg_access.cap.cap_next + = offsetof(struct pci_config, device); + + /* + * 4.1.4.6.1: + * + * The offset for the device-specific configuration MUST be 4-byte + * aligned. + */ + assert(dev->config.cfg_access.cap.cap_next % 4 == 0); + + /* Fix up device cfg field length. */ + dev->config.device.length = len; + + /* The rest is the same as the no-config case */ + no_device_config(dev); +} + +static void init_cap(struct virtio_pci_cap *cap, size_t caplen, int type, + size_t bar_offset, size_t bar_bytes, u8 next) +{ + cap->cap_vndr = PCI_CAP_ID_VNDR; + cap->cap_next = next; + cap->cap_len = caplen; + cap->cfg_type = type; + cap->bar = 0; + memset(cap->padding, 0, sizeof(cap->padding)); + cap->offset = bar_offset; + cap->length = bar_bytes; } /* - * This routine sets the configuration fields for an existing device's - * descriptor. It only works for the last device, but that's OK because that's - * how we use it. + * This sets up the pci_config structure, as defined in the virtio 1.0 + * standard (and PCI standard). */ -static void set_config(struct device *dev, unsigned len, const void *conf) +static void init_pci_config(struct pci_config *pci, u16 type, + u8 class, u8 subclass) { - /* Check we haven't overflowed our single page. */ - if (device_config(dev) + len > devices.descpage + getpagesize()) - errx(1, "Too many devices"); + size_t bar_offset, bar_len; + + /* + * 4.1.4.4.1: + * + * The device MUST either present notify_off_multiplier as an even + * power of 2, or present notify_off_multiplier as 0. + * + * 2.1.2: + * + * The device MUST initialize device status to 0 upon reset. + */ + memset(pci, 0, sizeof(*pci)); + + /* 4.1.2.1: Devices MUST have the PCI Vendor ID 0x1AF4 */ + pci->vendor_id = 0x1AF4; + /* 4.1.2.1: ... PCI Device ID calculated by adding 0x1040 ... */ + pci->device_id = 0x1040 + type; + + /* + * PCI have specific codes for different types of devices. + * Linux doesn't care, but it's a good clue for people looking + * at the device. + */ + pci->class = class; + pci->subclass = subclass; + + /* + * 4.1.2.1: + * + * Non-transitional devices SHOULD have a PCI Revision ID of 1 or + * higher + */ + pci->revid = 1; + + /* + * 4.1.2.1: + * + * Non-transitional devices SHOULD have a PCI Subsystem Device ID of + * 0x40 or higher. + */ + pci->subsystem_device_id = 0x40; + + /* We use our dummy interrupt controller, and irq_line is the irq */ + pci->irq_line = devices.next_irq++; + pci->irq_pin = 0; + + /* Support for extended capabilities. */ + pci->status = (1 << 4); + + /* Link them in. */ + /* + * 4.1.4.3.1: + * + * The device MUST present at least one common configuration + * capability. + */ + pci->capabilities = offsetof(struct pci_config, common); + + /* 4.1.4.3.1 ... offset MUST be 4-byte aligned. */ + assert(pci->capabilities % 4 == 0); + + bar_offset = offsetof(struct virtio_pci_mmio, cfg); + bar_len = sizeof(((struct virtio_pci_mmio *)0)->cfg); + init_cap(&pci->common, sizeof(pci->common), VIRTIO_PCI_CAP_COMMON_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, notify)); + + /* + * 4.1.4.4.1: + * + * The device MUST present at least one notification capability. + */ + bar_offset += bar_len; + bar_len = sizeof(((struct virtio_pci_mmio *)0)->notify); + + /* + * 4.1.4.4.1: + * + * The cap.offset MUST be 2-byte aligned. + */ + assert(pci->common.cap_next % 2 == 0); + + /* FIXME: Use a non-zero notify_off, for per-queue notification? */ + /* + * 4.1.4.4.1: + * + * The value cap.length presented by the device MUST be at least 2 and + * MUST be large enough to support queue notification offsets for all + * supported queues in all possible configurations. + */ + assert(bar_len >= 2); + + init_cap(&pci->notify.cap, sizeof(pci->notify), + VIRTIO_PCI_CAP_NOTIFY_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, isr)); + + bar_offset += bar_len; + bar_len = sizeof(((struct virtio_pci_mmio *)0)->isr); + /* + * 4.1.4.5.1: + * + * The device MUST present at least one VIRTIO_PCI_CAP_ISR_CFG + * capability. + */ + init_cap(&pci->isr, sizeof(pci->isr), + VIRTIO_PCI_CAP_ISR_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, cfg_access)); + + /* + * 4.1.4.7.1: + * + * The device MUST present at least one VIRTIO_PCI_CAP_PCI_CFG + * capability. + */ + /* This doesn't have any presence in the BAR */ + init_cap(&pci->cfg_access.cap, sizeof(pci->cfg_access), + VIRTIO_PCI_CAP_PCI_CFG, + 0, 0, 0); - /* Copy in the config information, and store the length. */ - memcpy(device_config(dev), conf, len); - dev->desc->config_len = len; + bar_offset += bar_len + sizeof(((struct virtio_pci_mmio *)0)->padding); + assert(bar_offset == sizeof(struct virtio_pci_mmio)); - /* Size must fit in config_len field (8 bits)! */ - assert(dev->desc->config_len == len); + /* + * This gets sewn in and length set in set_device_config(). + * Some devices don't have a device configuration interface, so + * we never expose this if we don't call set_device_config(). + */ + init_cap(&pci->device, sizeof(pci->device), VIRTIO_PCI_CAP_DEVICE_CFG, + bar_offset, 0, 0); } /* - * This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. We - * don't actually start the service threads until later. + * This routine does all the creation and setup of a new device, but we don't + * actually place the MMIO region until we know the size (if any) of the + * device-specific config. And we don't actually start the service threads + * until later. * * See what I mean about userspace being boring? */ -static struct device *new_device(const char *name, u16 type) +static struct device *new_pci_device(const char *name, u16 type, + u8 class, u8 subclass) { struct device *dev = malloc(sizeof(*dev)); /* Now we populate the fields one at a time. */ - dev->desc = new_dev_desc(type); dev->name = name; dev->vq = NULL; - dev->feature_len = 0; - dev->num_vq = 0; dev->running = false; - dev->next = NULL; + dev->wrote_features_ok = false; + dev->mmio_size = sizeof(struct virtio_pci_mmio); + dev->mmio = calloc(1, dev->mmio_size); + dev->features = (u64)1 << VIRTIO_F_VERSION_1; + dev->features_accepted = 0; - /* - * Append to device list. Prepending to a single-linked list is - * easier, but the user expects the devices to be arranged on the bus - * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. - */ - if (devices.lastdev) - devices.lastdev->next = dev; - else - devices.dev = dev; - devices.lastdev = dev; + if (devices.device_num + 1 >= MAX_PCI_DEVICES) + errx(1, "Can only handle 31 PCI devices"); + + init_pci_config(&dev->config, type, class, subclass); + assert(!devices.pci[devices.device_num+1]); + devices.pci[++devices.device_num] = dev; return dev; } @@ -1324,6 +2651,7 @@ static struct device *new_device(const char *name, u16 type) static void setup_console(void) { struct device *dev; + struct virtio_console_config conf; /* If we can save the initial standard input settings... */ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { @@ -1336,7 +2664,7 @@ static void setup_console(void) tcsetattr(STDIN_FILENO, TCSANOW, &term); } - dev = new_device("console", VIRTIO_ID_CONSOLE); + dev = new_pci_device("console", VIRTIO_ID_CONSOLE, 0x07, 0x00); /* We store the console state in dev->priv, and initialize it. */ dev->priv = malloc(sizeof(struct console_abort)); @@ -1348,10 +2676,14 @@ static void setup_console(void) * stdin. When they put something in the output queue, we write it to * stdout. */ - add_virtqueue(dev, VIRTQUEUE_NUM, console_input); - add_virtqueue(dev, VIRTQUEUE_NUM, console_output); + add_pci_virtqueue(dev, console_input, "input"); + add_pci_virtqueue(dev, console_output, "output"); + + /* We need a configuration area for the emerg_wr early writes. */ + add_pci_feature(dev, VIRTIO_CONSOLE_F_EMERG_WRITE); + set_device_config(dev, &conf, sizeof(conf)); - verbose("device %u: console\n", ++devices.device_num); + verbose("device %u: console\n", devices.device_num); } /*:*/ @@ -1449,6 +2781,7 @@ static void configure_device(int fd, const char *tapif, u32 ipaddr) static int get_tun_device(char tapif[IFNAMSIZ]) { struct ifreq ifr; + int vnet_hdr_sz; int netfd; /* Start with this zeroed. Messy but sure. */ @@ -1476,6 +2809,18 @@ static int get_tun_device(char tapif[IFNAMSIZ]) */ ioctl(netfd, TUNSETNOCSUM, 1); + /* + * In virtio before 1.0 (aka legacy virtio), we added a 16-bit + * field at the end of the network header iff + * VIRTIO_NET_F_MRG_RXBUF was negotiated. For virtio 1.0, + * that became the norm, but we need to tell the tun device + * about our expanded header (which is called + * virtio_net_hdr_mrg_rxbuf in the legacy system). + */ + vnet_hdr_sz = sizeof(struct virtio_net_hdr_v1); + if (ioctl(netfd, TUNSETVNETHDRSZ, &vnet_hdr_sz) != 0) + err(1, "Setting tun header size to %u", vnet_hdr_sz); + memcpy(tapif, ifr.ifr_name, IFNAMSIZ); return netfd; } @@ -1499,12 +2844,12 @@ static void setup_tun_net(char *arg) net_info->tunfd = get_tun_device(tapif); /* First we create a new network device. */ - dev = new_device("net", VIRTIO_ID_NET); + dev = new_pci_device("net", VIRTIO_ID_NET, 0x02, 0x00); dev->priv = net_info; /* Network devices need a recv and a send queue, just like console. */ - add_virtqueue(dev, VIRTQUEUE_NUM, net_input); - add_virtqueue(dev, VIRTQUEUE_NUM, net_output); + add_pci_virtqueue(dev, net_input, "rx"); + add_pci_virtqueue(dev, net_output, "tx"); /* * We need a socket to perform the magic network ioctls to bring up the @@ -1524,7 +2869,7 @@ static void setup_tun_net(char *arg) p = strchr(arg, ':'); if (p) { str2mac(p+1, conf.mac); - add_feature(dev, VIRTIO_NET_F_MAC); + add_pci_feature(dev, VIRTIO_NET_F_MAC); *p = '\0'; } @@ -1538,25 +2883,21 @@ static void setup_tun_net(char *arg) configure_device(ipfd, tapif, ip); /* Expect Guest to handle everything except UFO */ - add_feature(dev, VIRTIO_NET_F_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); - add_feature(dev, VIRTIO_NET_F_GUEST_ECN); - add_feature(dev, VIRTIO_NET_F_HOST_TSO4); - add_feature(dev, VIRTIO_NET_F_HOST_TSO6); - add_feature(dev, VIRTIO_NET_F_HOST_ECN); + add_pci_feature(dev, VIRTIO_NET_F_CSUM); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_CSUM); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_TSO4); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_TSO6); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_ECN); + add_pci_feature(dev, VIRTIO_NET_F_HOST_TSO4); + add_pci_feature(dev, VIRTIO_NET_F_HOST_TSO6); + add_pci_feature(dev, VIRTIO_NET_F_HOST_ECN); /* We handle indirect ring entries */ - add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); - /* We're compliant with the damn spec. */ - add_feature(dev, VIRTIO_F_ANY_LAYOUT); - set_config(dev, sizeof(conf), &conf); + add_pci_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); + set_device_config(dev, &conf, sizeof(conf)); /* We don't need the socket any more; setup is done. */ close(ipfd); - devices.device_num++; - if (bridging) verbose("device %u: tun %s attached to bridge: %s\n", devices.device_num, tapif, arg); @@ -1607,7 +2948,7 @@ static void blk_request(struct virtqueue *vq) head = wait_for_vq_desc(vq, iov, &out_num, &in_num); /* Copy the output header from the front of the iov (adjusts iov) */ - iov_consume(iov, out_num, &out, sizeof(out)); + iov_consume(vq->dev, iov, out_num, &out, sizeof(out)); /* Find and trim end of iov input array, for our status byte. */ in = NULL; @@ -1619,7 +2960,7 @@ static void blk_request(struct virtqueue *vq) } } if (!in) - errx(1, "Bad virtblk cmd with no room for status"); + bad_driver_vq(vq, "Bad virtblk cmd with no room for status"); /* * For historical reasons, block operations are expressed in 512 byte @@ -1627,15 +2968,7 @@ static void blk_request(struct virtqueue *vq) */ off = out.sector * 512; - /* - * In general the virtio block driver is allowed to try SCSI commands. - * It'd be nice if we supported eject, for example, but we don't. - */ - if (out.type & VIRTIO_BLK_T_SCSI_CMD) { - fprintf(stderr, "Scsi commands unsupported\n"); - *in = VIRTIO_BLK_S_UNSUPP; - wlen = sizeof(*in); - } else if (out.type & VIRTIO_BLK_T_OUT) { + if (out.type & VIRTIO_BLK_T_OUT) { /* * Write * @@ -1657,7 +2990,7 @@ static void blk_request(struct virtqueue *vq) /* Trim it back to the correct length */ ftruncate64(vblk->fd, vblk->len); /* Die, bad Guest, die. */ - errx(1, "Write past end %llu+%u", off, ret); + bad_driver_vq(vq, "Write past end %llu+%u", off, ret); } wlen = sizeof(*in); @@ -1699,11 +3032,11 @@ static void setup_block_file(const char *filename) struct vblk_info *vblk; struct virtio_blk_config conf; - /* Creat the device. */ - dev = new_device("block", VIRTIO_ID_BLOCK); + /* Create the device. */ + dev = new_pci_device("block", VIRTIO_ID_BLOCK, 0x01, 0x80); /* The device has one virtqueue, where the Guest places requests. */ - add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); + add_pci_virtqueue(dev, blk_request, "request"); /* Allocate the room for our own bookkeeping */ vblk = dev->priv = malloc(sizeof(*vblk)); @@ -1712,9 +3045,6 @@ static void setup_block_file(const char *filename) vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); vblk->len = lseek64(vblk->fd, 0, SEEK_END); - /* We support FLUSH. */ - add_feature(dev, VIRTIO_BLK_F_FLUSH); - /* Tell Guest how many sectors this device has. */ conf.capacity = cpu_to_le64(vblk->len / 512); @@ -1722,20 +3052,19 @@ static void setup_block_file(const char *filename) * Tell Guest not to put in too many descriptors at once: two are used * for the in and out elements. */ - add_feature(dev, VIRTIO_BLK_F_SEG_MAX); + add_pci_feature(dev, VIRTIO_BLK_F_SEG_MAX); conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); - /* Don't try to put whole struct: we have 8 bit limit. */ - set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); + set_device_config(dev, &conf, sizeof(struct virtio_blk_config)); verbose("device %u: virtblock %llu sectors\n", - ++devices.device_num, le64_to_cpu(conf.capacity)); + devices.device_num, le64_to_cpu(conf.capacity)); } /*L:211 - * Our random number generator device reads from /dev/random into the Guest's + * Our random number generator device reads from /dev/urandom into the Guest's * input buffers. The usual case is that the Guest doesn't want random numbers - * and so has no buffers although /dev/random is still readable, whereas + * and so has no buffers although /dev/urandom is still readable, whereas * console is the reverse. * * The same logic applies, however. @@ -1754,7 +3083,7 @@ static void rng_input(struct virtqueue *vq) /* First we need a buffer from the Guests's virtqueue. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); if (out_num) - errx(1, "Output buffers in rng?"); + bad_driver_vq(vq, "Output buffers in rng?"); /* * Just like the console write, we loop to cover the whole iovec. @@ -1763,8 +3092,8 @@ static void rng_input(struct virtqueue *vq) while (!iov_empty(iov, in_num)) { len = readv(rng_info->rfd, iov, in_num); if (len <= 0) - err(1, "Read from /dev/random gave %i", len); - iov_consume(iov, in_num, NULL, len); + err(1, "Read from /dev/urandom gave %i", len); + iov_consume(vq->dev, iov, in_num, NULL, len); totlen += len; } @@ -1780,17 +3109,20 @@ static void setup_rng(void) struct device *dev; struct rng_info *rng_info = malloc(sizeof(*rng_info)); - /* Our device's privat info simply contains the /dev/random fd. */ - rng_info->rfd = open_or_die("/dev/random", O_RDONLY); + /* Our device's private info simply contains the /dev/urandom fd. */ + rng_info->rfd = open_or_die("/dev/urandom", O_RDONLY); /* Create the new device. */ - dev = new_device("rng", VIRTIO_ID_RNG); + dev = new_pci_device("rng", VIRTIO_ID_RNG, 0xff, 0); dev->priv = rng_info; /* The device has one virtqueue, where the Guest places inbufs. */ - add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); + add_pci_virtqueue(dev, rng_input, "input"); - verbose("device %u: rng\n", devices.device_num++); + /* We don't have any configuration space */ + no_device_config(dev); + + verbose("device %u: rng\n", devices.device_num); } /* That's the end of device setup. */ @@ -1820,17 +3152,23 @@ static void __attribute__((noreturn)) restart_guest(void) static void __attribute__((noreturn)) run_guest(void) { for (;;) { - unsigned long notify_addr; + struct lguest_pending notify; int readval; /* We read from the /dev/lguest device to run the Guest. */ - readval = pread(lguest_fd, ¬ify_addr, - sizeof(notify_addr), cpu_id); - - /* One unsigned long means the Guest did HCALL_NOTIFY */ - if (readval == sizeof(notify_addr)) { - verbose("Notify on address %#lx\n", notify_addr); - handle_output(notify_addr); + readval = pread(lguest_fd, ¬ify, sizeof(notify), cpu_id); + if (readval == sizeof(notify)) { + if (notify.trap == 13) { + verbose("Emulating instruction at %#x\n", + getreg(eip)); + emulate_insn(notify.insn); + } else if (notify.trap == 14) { + verbose("Emulating MMIO at %#x\n", + getreg(eip)); + emulate_mmio(notify.addr, notify.insn); + } else + errx(1, "Unknown trap %i addr %#08x\n", + notify.trap, notify.addr); /* ENOENT means the Guest died. Reading tells us why. */ } else if (errno == ENOENT) { char reason[1024] = { 0 }; @@ -1893,11 +3231,9 @@ int main(int argc, char *argv[]) main_args = argv; /* - * First we initialize the device list. We keep a pointer to the last - * device, and the next interrupt number to use for devices (1: - * remember that 0 is used by the timer). + * First we initialize the device list. We remember next interrupt + * number to use for devices (1: remember that 0 is used by the timer). */ - devices.lastdev = NULL; devices.next_irq = 1; /* We're CPU 0. In fact, that's the only CPU possible right now. */ @@ -1921,12 +3257,14 @@ int main(int argc, char *argv[]) guest_base = map_zeroed_pages(mem / getpagesize() + DEVICE_PAGES); guest_limit = mem; - guest_max = mem + DEVICE_PAGES*getpagesize(); - devices.descpage = get_pages(1); + guest_max = guest_mmio = mem + DEVICE_PAGES*getpagesize(); break; } } + /* We always have a console device, and it's always device 1. */ + setup_console(); + /* The options are fairly straight-forward */ while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { switch (c) { @@ -1967,8 +3305,8 @@ int main(int argc, char *argv[]) verbose("Guest base is at %p\n", guest_base); - /* We always have a console device */ - setup_console(); + /* Initialize the (fake) PCI host bridge device. */ + init_pci_host_bridge(); /* Now we load the kernel */ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 000000000000..3653965cf481 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,2 @@ +libapi-y += fd/ +libapi-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 36c08b1f4afb..d8fe29fc19a4 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,49 +1,43 @@ include ../../scripts/Makefile.include include ../../perf/config/utilities.mak # QUIET_CLEAN +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# guard against environment variables -LIB_H= -LIB_OBJS= - -LIB_H += fs/debugfs.h -LIB_H += fs/fs.h -# See comment below about piggybacking... -LIB_H += fd/array.h - -LIB_OBJS += $(OUTPUT)fs/debugfs.o -LIB_OBJS += $(OUTPUT)fs/fs.o -# XXX piggybacking here, need to introduce libapikfd, or rename this -# to plain libapik.a and make it have it all api goodies -LIB_OBJS += $(OUTPUT)fd/array.o +MAKEFLAGS += --no-print-directory -LIBFILE = libapikfs.a +LIBFILE = $(OUTPUT)libapi.a -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lelf -lpthread -lrt -lm -ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ALL_LDFLAGS = $(LDFLAGS) +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -$(LIBFILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS) +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +API_IN := $(OUTPUT)libapi-in.o -$(LIB_OBJS): $(LIB_H) +export srctree OUTPUT CC LD CFLAGS V -libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs +all: $(LIBFILE) -$(OUTPUT)%.o: %.c libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< -$(OUTPUT)%.s: %.c libapi_dirs - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -$(OUTPUT)%.o: %.S libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(API_IN): FORCE + @$(MAKE) $(build)=libapi + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE) + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) + +FORCE: -.PHONY: clean +.PHONY: clean FORCE diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 000000000000..605d99f6d71a --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapi-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 000000000000..6de5a4f0b501 --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,4 @@ +libapi-y += fs.o +libapi-y += debugfs.o +libapi-y += findfs.o +libapi-y += tracefs.o diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index d2b18e887071..8305b3e9d48e 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -3,75 +3,50 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include <stdbool.h> #include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> #include <sys/mount.h> #include <linux/kernel.h> #include "debugfs.h" -char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", + DEBUGFS_DEFAULT_PATH, "/debug", 0, }; static bool debugfs_found; +bool debugfs_configured(void) +{ + return debugfs_find_mountpoint() != NULL; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { - const char * const *ptr; - char type[100]; - FILE *fp; + const char *ret; if (debugfs_found) return (const char *)debugfs_mountpoint; - ptr = debugfs_known_mountpoints; - while (*ptr) { - if (debugfs_valid_mountpoint(*ptr) == 0) { - debugfs_found = true; - strcpy(debugfs_mountpoint, *ptr); - return debugfs_mountpoint; - } - ptr++; - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - debugfs_mountpoint, type) == 2) { - if (strcmp(type, "debugfs") == 0) - break; - } - fclose(fp); + ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, + debugfs_mountpoint, PATH_MAX + 1, + debugfs_known_mountpoints); + if (ret) + debugfs_found = true; - if (strcmp(type, "debugfs") != 0) - return NULL; - - debugfs_found = true; - - return debugfs_mountpoint; -} - -/* verify that a mountpoint is actually a debugfs instance */ - -int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) - return -ENOENT; - - return 0; + return ret; } /* mount the debugfs somewhere if it's not mounted */ @@ -87,7 +62,7 @@ char *debugfs_mount(const char *mountpoint) mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); /* if no environment variable, use default */ if (mountpoint == NULL) - mountpoint = "/sys/kernel/debug"; + mountpoint = DEBUGFS_DEFAULT_PATH; } if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 0739881a9897..455023698d2b 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -1,16 +1,7 @@ #ifndef __API_DEBUGFS_H__ #define __API_DEBUGFS_H__ -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems <limits.h> would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif +#include "findfs.h" #ifndef DEBUGFS_MAGIC #define DEBUGFS_MAGIC 0x64626720 @@ -20,8 +11,8 @@ #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #endif +bool debugfs_configured(void); const char *debugfs_find_mountpoint(void); -int debugfs_valid_mountpoint(const char *debugfs); char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c new file mode 100644 index 000000000000..49946cb6d7af --- /dev/null +++ b/tools/lib/api/fs/findfs.c @@ -0,0 +1,63 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <sys/vfs.h> + +#include "findfs.h" + +/* verify that a mountpoint is actually the type we want */ + +int valid_mountpoint(const char *mount, long magic) +{ + struct statfs st_fs; + + if (statfs(mount, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +/* find the path to a mounted file system */ +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints) +{ + const char * const *ptr; + char format[128]; + char type[100]; + FILE *fp; + + if (known_mountpoints) { + ptr = known_mountpoints; + while (*ptr) { + if (valid_mountpoint(*ptr, magic) == 0) { + strncpy(mountpoint, *ptr, len - 1); + mountpoint[len-1] = 0; + return mountpoint; + } + ptr++; + } + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); + + while (fscanf(fp, format, mountpoint, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + fclose(fp); + + if (strcmp(type, fstype) != 0) + return NULL; + + return mountpoint; +} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h new file mode 100644 index 000000000000..b6f5d05acc42 --- /dev/null +++ b/tools/lib/api/fs/findfs.h @@ -0,0 +1,23 @@ +#ifndef __API_FINDFS_H__ +#define __API_FINDFS_H__ + +#include <stdbool.h> + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints); + +int valid_mountpoint(const char *mount, long magic); + +#endif /* __API_FINDFS_H__ */ diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c new file mode 100644 index 000000000000..e4aa9688b71e --- /dev/null +++ b/tools/lib/api/fs/tracefs.c @@ -0,0 +1,78 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <linux/kernel.h> + +#include "tracefs.h" + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; + +static const char * const tracefs_known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static bool tracefs_found; + +bool tracefs_configured(void) +{ + return tracefs_find_mountpoint() != NULL; +} + +/* find the path to the mounted tracefs */ +const char *tracefs_find_mountpoint(void) +{ + const char *ret; + + if (tracefs_found) + return (const char *)tracefs_mountpoint; + + ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, + tracefs_mountpoint, PATH_MAX + 1, + tracefs_known_mountpoints); + + if (ret) + tracefs_found = true; + + return ret; +} + +/* mount the tracefs somewhere if it's not mounted */ +char *tracefs_mount(const char *mountpoint) +{ + /* see if it's already mounted */ + if (tracefs_find_mountpoint()) + goto out; + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = TRACEFS_DEFAULT_PATH; + } + + if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) + return NULL; + + /* save the mountpoint */ + tracefs_found = true; + strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); +out: + return tracefs_mountpoint; +} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h new file mode 100644 index 000000000000..da780ac49acb --- /dev/null +++ b/tools/lib/api/fs/tracefs.h @@ -0,0 +1,21 @@ +#ifndef __API_TRACEFS_H__ +#define __API_TRACEFS_H__ + +#include "findfs.h" + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef PERF_TRACEFS_ENVIRONMENT +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#endif + +bool tracefs_configured(void); +const char *tracefs_find_mountpoint(void); +int tracefs_valid_mountpoint(const char *debugfs); +char *tracefs_mount(const char *mountpoint); + +extern char tracefs_mountpoint[]; + +#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 000000000000..6f667355b068 --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 4b866c54f624..0c356fb65022 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -35,6 +35,10 @@ bindir = $(prefix)/$(bindir_relative) export DESTDIR DESTDIR_SQ INSTALL +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -44,56 +48,21 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(BUILD_OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ - BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 -endef - -saved-output := $(BUILD_OUTPUT) -BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) -$(if $(BUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -all: sub-make - -gui: force - $(call build_output, all_cmd) - -$(filter-out gui,$(MAKECMDGOALS)): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # BUILD_OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) -objtree := $(realpath $(CURDIR)) -src := $(srctree) -obj := $(objtree) - -export prefix libdir bindir src obj - # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) +LIB_IN := $(OUTPUT)liblockdep-in.o + BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) CONFIG_INCLUDES = CONFIG_LIBS = @@ -108,33 +77,23 @@ INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) ifeq ($(VERBOSE),1) Q = - print_compile = - print_app_build = - print_fpic_compile = print_shared_lib_compile = print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif -do_fpic_compile = \ - ($(print_fpic_compile) \ - $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) - -do_app_build = \ - ($(print_app_build) \ - $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -144,22 +103,6 @@ do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) - -define do_compile - $(print_compile) \ - $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; -endef - -$(obj)/%.o: $(src)/%.c - $(Q)$(call do_compile) - -%.o: $(src)/%.c - $(Q)$(call do_compile) - -PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o - -ALL_OBJS = $(PEVENT_LIB_OBJS) - CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) @@ -169,42 +112,15 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(PEVENT_LIB_OBJS) +liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c - $(Q)$(do_fpic_compile) - -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -233,8 +149,6 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS -endif # skip-makefile - PHONY += force force: diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 000000000000..c681d0575d16 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 005c9cc06935..d410da335e3d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -67,7 +67,7 @@ PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' endif -include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include +include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -78,40 +78,13 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \ - BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1 -endef - -all: sub-make - -$(MAKECMDGOALS): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - export prefix bindir src obj # Shell quotes @@ -132,16 +105,19 @@ EXTRAVERSION = $(EP_EXTRAVERSION) OBJ = $@ N = -export Q VERBOSE - EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -Wall +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif # Append required CFLAGS +override CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) override CFLAGS += $(udis86-flags) -D_GNU_SOURCE @@ -151,74 +127,58 @@ else Q = @ endif -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@) - -do_plugin_build = \ - ($(print_plugin_build) \ - $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - - -do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= -$(obj)/%.o: $(src)/%.c - $(call do_compile) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -%.o: $(src)/%.c - $(call do_compile) +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so -PEVENT_LIB_OBJS = event-parse.o -PEVENT_LIB_OBJS += event-plugin.o -PEVENT_LIB_OBJS += trace-seq.o -PEVENT_LIB_OBJS += parse-filter.o -PEVENT_LIB_OBJS += parse-utils.o -PEVENT_LIB_OBJS += kbuffer-parse.o +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) -PLUGIN_OBJS = plugin_jbd2.o -PLUGIN_OBJS += plugin_hrtimer.o -PLUGIN_OBJS += plugin_kmem.o -PLUGIN_OBJS += plugin_kvm.o -PLUGIN_OBJS += plugin_mac80211.o -PLUGIN_OBJS += plugin_sched_switch.o -PLUGIN_OBJS += plugin_function.o -PLUGIN_OBJS += plugin_xen.o -PLUGIN_OBJS += plugin_scsi.o -PLUGIN_OBJS += plugin_cfg80211.o - -PLUGINS := $(PLUGIN_OBJS:.o=.so) - -ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) CMD_TARGETS = $(LIB_FILE) $(PLUGINS) TARGETS = $(CMD_TARGETS) - all: all_cmd all_cmd: $(CMD_TARGETS) -libtraceevent.so: $(PEVENT_LIB_OBJS) +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so: $(TE_IN) $(QUIET_LINK)$(CC) --shared $^ -o $@ -libtraceevent.a: $(PEVENT_LIB_OBJS) +$(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ plugins: $(PLUGINS) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@ +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) -$(PLUGIN_OBJS): %.o : $(src)/%.c - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $< +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) -$(PLUGINS): %.so: %.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $< +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -255,40 +215,6 @@ define update_dir fi); endef -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - -TRACEEVENT-CFLAGS: force - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ - echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ - fi - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -327,14 +253,9 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ $(RM) TRACEEVENT-CFLAGS tags TAGS -endif # skip-makefile - PHONY += force plugins force: -plugins: - @echo > /dev/null - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7a3873ff9a4f..5b4efc062320 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -22,6 +22,7 @@ #include <stdbool.h> #include <stdarg.h> +#include <stdio.h> #include <regex.h> #include <string.h> @@ -91,6 +92,7 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c); extern void trace_seq_terminate(struct trace_seq *s); +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); extern int trace_seq_do_printf(struct trace_seq *s); diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index ec3bd16a5488..292dc9f1d233 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -231,19 +231,24 @@ void trace_seq_terminate(struct trace_seq *s) s->buffer[s->len] = 0; } -int trace_seq_do_printf(struct trace_seq *s) +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) { TRACE_SEQ_CHECK(s); switch (s->state) { case TRACE_SEQ__GOOD: - return printf("%.*s", s->len, s->buffer); + return fprintf(fp, "%.*s", s->len, s->buffer); case TRACE_SEQ__BUFFER_POISONED: - puts("Usage of trace_seq after it was destroyed"); + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); break; case TRACE_SEQ__MEM_ALLOC_FAILED: - puts("Can't allocate trace_seq buffer memory"); + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); break; } return -1; } + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} diff --git a/tools/perf/Build b/tools/perf/Build new file mode 100644 index 000000000000..b77370ef7005 --- /dev/null +++ b/tools/perf/Build @@ -0,0 +1,44 @@ +perf-y += builtin-bench.o +perf-y += builtin-annotate.o +perf-y += builtin-diff.o +perf-y += builtin-evlist.o +perf-y += builtin-help.o +perf-y += builtin-sched.o +perf-y += builtin-buildid-list.o +perf-y += builtin-buildid-cache.o +perf-y += builtin-list.o +perf-y += builtin-record.o +perf-y += builtin-report.o +perf-y += builtin-stat.o +perf-y += builtin-timechart.o +perf-y += builtin-top.o +perf-y += builtin-script.o +perf-y += builtin-kmem.o +perf-y += builtin-lock.o +perf-y += builtin-kvm.o +perf-y += builtin-inject.o +perf-y += builtin-mem.o +perf-y += builtin-data.o + +perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_LIBELF) += builtin-probe.o + +perf-y += bench/ +perf-y += tests/ + +perf-y += perf.o + +paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" +paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" +paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" + +CFLAGS_builtin-help.o += $(paths) +CFLAGS_builtin-timechart.o += $(paths) +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE + +libperf-y += util/ +libperf-y += arch/ +libperf-y += ui/ +libperf-y += scripts/ + +gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt new file mode 100644 index 000000000000..f6fc6507ba55 --- /dev/null +++ b/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 0294c57b1f5e..dd07b55f58d8 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command manages the build-id cache. It can add and remove files to/from -the cache. In the future it should as well purge older entries, set upper -limits for the space used by the cache, etc. +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. OPTIONS ------- @@ -36,14 +36,24 @@ OPTIONS actually made. -r:: --remove=:: - Remove specified file from the cache. + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. -M:: --missing=:: List missing build ids in the cache for the specified file. -u:: ---update:: - Update specified file of the cache. It can be used to update kallsyms - kernel dso to vmlinux in order to support annotation. +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt new file mode 100644 index 000000000000..be8fa1a0a97e --- /dev/null +++ b/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,40 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [<common options>] <command> [<options>]", + +DESCRIPTION +----------- +Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index e463caa3eb49..518266192d67 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -20,6 +20,11 @@ If no parameters are passed it will assume perf.data.old and perf.data. The differential profile is displayed only for events matching both specified perf.data files. +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + OPTIONS ------- -D:: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3e2aec94f806..4692d277980b 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -127,6 +127,12 @@ To limit the list use: One or more types can be used at the same time, listing the events for the types specified. +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index aaa869be3dc1..239609c09f83 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -47,6 +47,12 @@ OPTIONS -v:: --verbose:: Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. -a:: --add=:: @@ -96,7 +102,7 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. ---max-probes:: +--max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. -x:: @@ -104,8 +110,13 @@ OPTIONS Specify path to the executable or shared library file for user space tracing. Can also be used with --funcs option. +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + --demangle-kernel:: - Demangle kernel symbols. + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -137,6 +148,7 @@ Each probe argument follows below syntax. [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e977459c51..cae75c11120f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -115,13 +115,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: @@ -235,6 +241,9 @@ Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option is off by default. +--running-time:: +Record running and enabled time for read events (:S) + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7e1b1f2bb83c..ba03fd5d1a54 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -55,6 +55,9 @@ OPTIONS --uid=:: Record events in threads owned by uid. Name or number. +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + -v:: --verbose=:: Verbosity level. @@ -115,6 +118,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--event:: + Trace other events, see 'perf list' for a complete list. + PAGEFAULTS ---------- diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 1e8e400b4493..2b131776363e 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -13,11 +13,16 @@ SYNOPSIS OPTIONS ------- --debug:: - Setup debug variable (just verbose for now) in value + Setup debug variable (see list below) in value range (0, 10). Use like: --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + --buildid-dir:: Setup buildid cache directory. It has higher priority than buildid.dir config file option. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fbbfdc39271d..11ccbb22ea2b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,5 +1,6 @@ tools/perf tools/scripts +tools/build tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index aa6a50447c32..ec4c063ed9f3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -68,7 +68,9 @@ include config/utilities.mak # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode # # Define NO_ZLIB if you do not want to support compressed kernel modules - +# +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support +# for CTF data format. ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -82,13 +84,29 @@ endif ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH +endif + +ifeq ($(V),1) + Q = +else + Q = @ endif +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE + $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -127,10 +145,6 @@ export prefix bindir sharedir sysconfdir DESTDIR SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -155,8 +169,8 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBAPIKFS = $(LIB_PATH)libapikfs.a -export LIBAPIKFS +LIBAPI = $(LIB_PATH)libapi.a +export LIBAPI # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -167,7 +181,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ @@ -206,297 +220,9 @@ endif export PERL_PATH -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - LIB_FILE=$(OUTPUT)libperf.a -LIB_H += ../lib/symbol/kallsyms.h -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += ../include/linux/bitops.h -LIB_H += ../include/asm-generic/bitops/arch_hweight.h -LIB_H += ../include/asm-generic/bitops/atomic.h -LIB_H += ../include/asm-generic/bitops/const_hweight.h -LIB_H += ../include/asm-generic/bitops/find.h -LIB_H += ../include/asm-generic/bitops/fls64.h -LIB_H += ../include/asm-generic/bitops/fls.h -LIB_H += ../include/asm-generic/bitops/__ffs.h -LIB_H += ../include/asm-generic/bitops/__fls.h -LIB_H += ../include/asm-generic/bitops/hweight.h -LIB_H += ../include/asm-generic/bitops.h -LIB_H += ../include/linux/compiler.h -LIB_H += ../include/linux/log2.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += ../include/linux/export.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += ../include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += ../include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/db-export.h -LIB_H += util/debug.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/find-vdso-map.c -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/ordered-events.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/comm.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += util/tsc.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h -LIB_H += util/data.h -LIB_H += util/kvm-stat.h -LIB_H += util/thread-stack.h - -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/db-export.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/find_next_bit.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/kallsyms.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/ordered-events.o -LIB_OBJS += $(OUTPUT)util/comm.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/trace-event.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o -LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o -LIB_OBJS += $(OUTPUT)util/srcline.o -LIB_OBJS += $(OUTPUT)util/data.o -LIB_OBJS += $(OUTPUT)util/tsc.o -LIB_OBJS += $(OUTPUT)util/cloexec.o -LIB_OBJS += $(OUTPUT)util/thread-stack.o - -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - -LIB_OBJS += $(OUTPUT)arch/common.o - -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/fdarray.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_common.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/hists_filter.o -LIB_OBJS += $(OUTPUT)tests/hists_output.o -LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_DWARF_UNWIND -ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) -LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o -endif -endif -LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o -LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o -LIB_OBJS += $(OUTPUT)tests/switch-tracking.o - -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(ARCH), x86) -ifeq ($(IS_64_BIT), 1) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o - -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o - -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -508,67 +234,9 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifdef NO_LIBELF -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - -ifndef NO_LIBDW_DWARF_UNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libdw.o - LIB_H += util/unwind-libdw.h -endif - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o -endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o - -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/browsers/header.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/tui/tui.h - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so - - GTK_OBJS += $(OUTPUT)ui/gtk/browser.o - GTK_OBJS += $(OUTPUT)ui/gtk/hists.o - GTK_OBJS += $(OUTPUT)ui/gtk/setup.o - GTK_OBJS += $(OUTPUT)ui/gtk/util.o - GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o - GTK_OBJS += $(OUTPUT)ui/gtk/progress.o - GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + GTK_IN := $(OUTPUT)gtk-in.o install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ @@ -576,31 +244,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h - endif - LIB_OBJS += $(OUTPUT)util/perf_regs.o -endif - -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - -ifndef NO_ZLIB - LIB_OBJS += $(OUTPUT)util/zlib.o -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -616,39 +259,29 @@ SHELL = $(SHELL_PATH) all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) + $(Q)$$(:) shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ +PERF_IN := $(OUTPUT)perf-in.o -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) - $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< +$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE + $(Q)$(MAKE) $(build)=perf -$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(GTK_IN): FORCE + $(Q)$(MAKE) $(build)=gtk -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt @@ -659,8 +292,7 @@ $(SCRIPTS) : % : %.sh $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ +perf.spec $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: @@ -683,90 +315,33 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< - -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< +# get relative building directory (to $(OUTPUT)) +# and '.' if it's $(OUTPUT) itself +__build-dir = $(subst $(OUTPUT),,$(dir $@)) +build-dir = $(if $(__build-dir),$(__build-dir),.) -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< +$(OUTPUT)%.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.i: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.s: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-bison.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-flex.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.o: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< - -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< +$(OUTPUT)%.i: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -781,58 +356,34 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) -DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -# no need to add flex objects, because they depend on bison ones -DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c -DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c +LIBPERF_IN := $(OUTPUT)libperf-in.o -OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) +$(LIBPERF_IN): FORCE + $(Q)$(MAKE) $(build)=libperf -$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) -# In the second step, we make a rule to actually create these directories -$(OUTPUT_DIRECTORIES): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null +$(LIB_FILE): $(LIBPERF_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) - -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) - -LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) -LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins +$(LIBTRACEEVENT): FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) - -# if subdir is set, we've been called from above so target has been built -# already -$(LIBAPIKFS): $(LIBAPIKFS_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a -endif +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -$(LIBAPIKFS)-clean: -ifeq ($(subdir),) - $(call QUIET_CLEAN, libapikfs) - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -endif +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @echo 'Perf make targets:' @@ -888,17 +439,6 @@ cscope: $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - ### Testing rules # GNU make supports exporting all variables by "export" without parameters. @@ -981,12 +521,14 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - @$(MAKE) -C config/feature-checks clean >/dev/null + $(Q)$(MAKE) -C config/feature-checks clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -1000,7 +542,9 @@ else GIT-HEAD-PHONY = endif +FORCE: + .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build new file mode 100644 index 000000000000..109eb75cf7de --- /dev/null +++ b/tools/perf/arch/Build @@ -0,0 +1,2 @@ +libperf-y += common.o +libperf-y += $(ARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/arm/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 09d62153d384..7fbca175099e 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,14 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/arm/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build new file mode 100644 index 000000000000..d22e3d07de3d --- /dev/null +++ b/tools/perf/arch/arm/util/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 67e9b3d38e89..7fbca175099e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,7 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 000000000000..e58123a8912b --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/powerpc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 6f7782bea5dd..7fbca175099e 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,6 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build new file mode 100644 index 000000000000..0af6e9b3f728 --- /dev/null +++ b/tools/perf/arch/powerpc/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/s390/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 798ac7379c5f..21322e0385b8 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,7 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build new file mode 100644 index 000000000000..8a61372bb47a --- /dev/null +++ b/tools/perf/arch/s390/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sh/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sh/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sparc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sparc/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/x86/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 9b21881db52f..21322e0385b8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,19 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o -endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o -LIB_H += arch/$(ARCH)/util/tsc.h HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/x86/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 000000000000..cfbccc4e3187 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,8 @@ +libperf-y += header.o +libperf-y += tsc.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build new file mode 100644 index 000000000000..5ce98023d518 --- /dev/null +++ b/tools/perf/bench/Build @@ -0,0 +1,11 @@ +perf-y += sched-messaging.o +perf-y += sched-pipe.o +perf-y += mem-memcpy.o +perf-y += futex-hash.o +perf-y += futex-wake.o +perf-y += futex-requeue.o + +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +perf-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 50e6b66aea1f..d47a0cdc71c9 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -125,8 +125,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, return ret; } -static int build_id_cache__add_kcore(const char *filename, const char *debugdir, - bool force) +static int build_id_cache__add_kcore(const char *filename, bool force) { char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; @@ -143,7 +142,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - debugdir, sbuildid); + buildid_dir, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -155,7 +154,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - debugdir, sbuildid, dir); + buildid_dir, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -183,7 +182,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return 0; } -static int build_id_cache__add_file(const char *filename, const char *debugdir) +static int build_id_cache__add_file(const char *filename) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; u8 build_id[BUILD_ID_SIZE]; @@ -195,16 +194,14 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, + err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Adding %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename, - const char *debugdir) +static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -217,10 +214,34 @@ static int build_id_cache__remove_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (verbose) - pr_info("Removing %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + err = build_id_cache__remove_s(sbuild_id); + pr_debug("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + +static int build_id_cache__purge_path(const char *pathname) +{ + struct strlist *list; + struct str_node *pos; + int err; + + err = build_id_cache__list_build_ids(pathname, &list); + if (err) + goto out; + + strlist__for_each(pos, list) { + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); + if (err) + break; + } + strlist__delete(list); + +out: + pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); return err; } @@ -252,13 +273,12 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename, - const char *debugdir) +static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - int err; + int err = 0; if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -266,14 +286,14 @@ static int build_id_cache__update_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (!err) { - err = build_id_cache__add_s(sbuild_id, debugdir, filename, - false, false); - } - if (verbose) - pr_info("Updating %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + if (build_id_cache__cached(sbuild_id)) + err = build_id_cache__remove_s(sbuild_id); + + if (!err) + err = build_id_cache__add_s(sbuild_id, filename, false, false); + + pr_debug("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -287,6 +307,7 @@ int cmd_buildid_cache(int argc, const char **argv, bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, + *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, *kcore_filename = NULL; @@ -304,6 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv, "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('p', "purge", &purge_name_list_str, "path list", + "path(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -320,6 +343,11 @@ int cmd_buildid_cache(int argc, const char **argv, argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); + if (argc || (!add_name_list_str && !kcore_filename && + !remove_name_list_str && !purge_name_list_str && + !missing_filename && !update_name_list_str)) + usage_with_options(buildid_cache_usage, buildid_cache_options); + if (missing_filename) { file.path = missing_filename; file.force = force; @@ -338,7 +366,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, buildid_dir)) { + if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -356,7 +384,25 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, buildid_dir)) { + if (build_id_cache__remove_file(pos->s)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + } + + strlist__delete(list); + } + } + + if (purge_name_list_str) { + list = strlist__new(true, purge_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__purge_path(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -377,7 +423,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, buildid_dir)) { + if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -391,8 +437,7 @@ int cmd_buildid_cache(int argc, const char **argv, } } - if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) + if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c new file mode 100644 index 000000000000..155cf75b8199 --- /dev/null +++ b/tools/perf/builtin-data.c @@ -0,0 +1,119 @@ +#include <linux/compiler.h> +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "parse-options.h" +#include "data-convert-bt.h" + +typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); + +struct data_cmd { + const char *name; + const char *summary; + data_cmd_fn_t fn; +}; + +static struct data_cmd data_cmds[]; + +#define for_each_cmd(cmd) \ + for (cmd = data_cmds; cmd && cmd->name; cmd++) + +static const struct option data_options[] = { + OPT_END() +}; + +static const char * const data_usage[] = { + "perf data [<common options>] <command> [<options>]", + NULL +}; + +static void print_usage(void) +{ + struct data_cmd *cmd; + + printf("Usage:\n"); + printf("\t%s\n\n", data_usage[0]); + printf("\tAvailable commands:\n"); + + for_each_cmd(cmd) { + printf("\t %s\t- %s\n", cmd->name, cmd->summary); + } + + printf("\n"); +} + +static const char * const data_convert_usage[] = { + "perf data convert [<options>]", + NULL +}; + +static int cmd_data_convert(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + const char *to_ctf = NULL; + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), +#ifdef HAVE_LIBBABELTRACE_SUPPORT + OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), +#endif + OPT_END() + }; + +#ifndef HAVE_LIBBABELTRACE_SUPPORT + pr_err("No conversion support compiled in.\n"); + return -1; +#endif + + argc = parse_options(argc, argv, options, + data_convert_usage, 0); + if (argc) { + usage_with_options(data_convert_usage, options); + return -1; + } + + if (to_ctf) { +#ifdef HAVE_LIBBABELTRACE_SUPPORT + return bt_convert__perf2ctf(input_name, to_ctf); +#else + pr_err("The libbabeltrace support is not compiled in.\n"); + return -1; +#endif + } + + return 0; +} + +static struct data_cmd data_cmds[] = { + { "convert", "converts data file between formats", cmd_data_convert }, + { .name = NULL, }, +}; + +int cmd_data(int argc, const char **argv, const char *prefix) +{ + struct data_cmd *cmd; + const char *cmdstr; + + /* No command specified. */ + if (argc < 2) + goto usage; + + argc = parse_options(argc, argv, data_options, data_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 1) + goto usage; + + cmdstr = argv[0]; + + for_each_cmd(cmd) { + if (strcmp(cmd->name, cmdstr)) + continue; + + return cmd->fn(argc, argv, prefix); + } + + pr_err("Unknown command: %s\n", cmdstr); +usage: + print_usage(); + return -1; +} diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 198f3c3aff95..af5bd0514108 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,38 +36,36 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (raw_dump) { - print_events(NULL, true); - return 0; - } + if (!raw_dump) + printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, false); + print_events(NULL, raw_dump); return 0; } for (i = 0; i < argc; ++i) { - if (i) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL, false); + if (strcmp(argv[i], "tracepoint") == 0) + print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); + print_symbol_events(NULL, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); + print_symbol_events(NULL, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); + print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); + print_pmu_events(NULL, raw_dump); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], false); + print_events(argv[i], raw_dump); continue; } sep_idx = sep - argv[i]; @@ -76,7 +74,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); + print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); } } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab3434052..4fdad06d37db 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -658,7 +658,7 @@ error: static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -751,9 +751,9 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* @@ -839,6 +839,8 @@ struct option __record_options[] = { "use per-thread mmaps"), OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, "Sample machine registers on interrupt"), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094e228b..fb350343b1d7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } @@ -766,7 +768,7 @@ repeat: * 0/1 means the user chose a mode. */ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && - branch_call_mode == -1) { + !branch_call_mode) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c3930080e..7ce296618717 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1473,9 +1473,9 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } if (psession) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c4c7eac69de4..5fb8723c7128 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -716,7 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine) { pr_err("%u unprocessable samples recorded.\r", - top->session->stats.nr_unprocessable_samples++); + top->session->evlist->stats.nr_unprocessable_samples++); return; } @@ -856,7 +856,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(evsel__hists(evsel), event->header.type); machine__process_event(machine, event, &sample); } else - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; next_event: perf_evlist__mmap_consume(top->evlist, idx); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7e935f1083ec..d95a8f4d988c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -52,7 +52,9 @@ struct tp_field { #define TP_UINT_FIELD(bits) \ static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - return *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ + return value; \ } TP_UINT_FIELD(8); @@ -63,7 +65,8 @@ TP_UINT_FIELD(64); #define TP_UINT_FIELD__SWAPPED(bits) \ static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ return bswap_##bits(value);\ } @@ -1219,7 +1222,9 @@ struct trace { struct syscall *table; } syscalls; struct record_opts opts; + struct perf_evlist *evlist; struct machine *host; + struct thread *current; u64 base_time; FILE *output; unsigned long nr_events; @@ -1227,6 +1232,10 @@ struct trace { const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; double duration_filter; double runtime_ms; struct { @@ -1511,11 +1520,22 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +/* + * args is to be interpreted as a series of longs but we need to handle + * 8-byte unaligned accesses. args points to raw_data within the event + * and raw_data is guaranteed to be 8-byte unaligned because it is + * preceded by raw_size which is a u32. So we need to copy args to a temp + * variable to read it. Most notably this avoids extended load instructions + * on unaligned addresses + */ + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args, struct trace *trace, + unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; + unsigned char *p; + unsigned long val; if (sc->tp_format != NULL) { struct format_field *field; @@ -1531,12 +1551,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; + + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * arg.idx; + memcpy(&val, p, sizeof(val)); + /* * Suppress this argument if its value is zero and * and we don't have a string associated in an * strarray for it. */ - if (args[arg.idx] == 0 && + if (val == 0 && !(sc->arg_scnprintf && sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && sc->arg_parm[arg.idx])) @@ -1545,23 +1570,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { - arg.val = args[arg.idx]; + arg.val = val; if (sc->arg_parm) arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[arg.idx]); + "%ld", val); } } } else { int i = 0; while (i < 6) { + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * i; + memcpy(&val, p, sizeof(val)); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", - printed ? ", " : "", i, args[i]); + printed ? ", " : "", i, val); ++i; } } @@ -1642,6 +1670,29 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } +static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +{ + struct thread_trace *ttrace; + u64 duration; + size_t printed; + + if (trace->current == NULL) + return 0; + + ttrace = thread__priv(trace->current); + + if (!ttrace->entry_pending) + return 0; + + duration = sample->time - ttrace->entry_time; + + printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); + ttrace->entry_pending = false; + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1673,6 +1724,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return -1; } + printed += trace__printf_interrupted_entry(trace, sample); + ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); @@ -1688,6 +1741,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; + trace->current = thread; + return 0; } @@ -1805,6 +1860,28 @@ out_dump: return 0; } +static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + trace__printf_interrupted_entry(trace, sample); + trace__fprintf_tstamp(trace, sample->time, trace->output); + + if (trace->trace_syscalls) + fprintf(trace->output, "( ): "); + + fprintf(trace->output, "%s:", evsel->name); + + if (evsel->tp_format) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } + + fprintf(trace->output, ")\n"); + return 0; +} + static void print_location(FILE *f, struct perf_sample *sample, struct addr_location *al, bool print_dso, bool print_sym) @@ -2037,10 +2114,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, return 0; } -static int trace__run(struct trace *trace, int argc, const char **argv) +static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { - struct perf_evlist *evlist = perf_evlist__new(); + const u32 type = event->header.type; struct perf_evsel *evsel; + + if (!trace->full_time && trace->base_time == 0) + trace->base_time = sample->time; + + if (type != PERF_RECORD_SAMPLE) { + trace__process_event(trace, trace->host, event, sample); + return; + } + + evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + if (evsel == NULL) { + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); + return; + } + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample->raw_data == NULL) { + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + perf_evsel__name(evsel), sample->tid, + sample->cpu, sample->raw_size); + } else { + tracepoint_handler handler = evsel->handler; + handler(trace, evsel, event, sample); + } +} + +static int trace__run(struct trace *trace, int argc, const char **argv) +{ + struct perf_evlist *evlist = trace->evlist; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2048,11 +2154,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (evlist == NULL) { - fprintf(trace->output, "Not enough memory to run!\n"); - goto out; - } - if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) @@ -2105,16 +2206,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + /* + * Better not use !target__has_task() here because we need to cover the + * case where no threads were specified in the command line, but a + * workload was, and in that case we will fill in the thread_map when + * we fork the workload in perf_evlist__prepare_workload. + */ + if (trace->filter_pids.nr > 0) + err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); + else if (evlist->threads->map[0] == -1) + err = perf_evlist__set_filter_pid(evlist, getpid()); + + if (err < 0) { + printf("err=%d,%s\n", -err, strerror(-err)); + exit(1); + } + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; - perf_evlist__enable(evlist); - if (forks) perf_evlist__start_workload(evlist); + else + perf_evlist__enable(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; + trace->multiple_threads = evlist->threads->map[0] == -1 || + evlist->threads->nr > 1 || + perf_evlist__first(evlist)->attr.inherit; again: before = trace->nr_events; @@ -2122,8 +2241,6 @@ again: union perf_event *event; while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { - const u32 type = event->header.type; - tracepoint_handler handler; struct perf_sample sample; ++trace->nr_events; @@ -2134,30 +2251,7 @@ again: goto next_event; } - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample.time; - - if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event, &sample); - continue; - } - - evsel = perf_evlist__id2evsel(evlist, sample.id); - if (evsel == NULL) { - fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - goto next_event; - } - - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && - sample.raw_data == NULL) { - fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample.tid, - sample.cpu, sample.raw_size); - goto next_event; - } - - handler = evsel->handler; - handler(trace, evsel, event, &sample); + trace__handle_event(trace, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); @@ -2197,7 +2291,7 @@ out_disable: out_delete_evlist: perf_evlist__delete(evlist); -out: + trace->evlist = NULL; trace->live = false; return err; { @@ -2434,6 +2528,38 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__set_filter_pids(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret = -1; + size_t i; + struct trace *trace = opt->value; + /* + * FIXME: introduce a intarray class, plain parse csv and create a + * { int nr, int entries[] } struct... + */ + struct intlist *list = intlist__new(str); + + if (list == NULL) + return -1; + + i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; + trace->filter_pids.entries = calloc(i, sizeof(pid_t)); + + if (trace->filter_pids.entries == NULL) + goto out; + + trace->filter_pids.entries[0] = getpid(); + + for (i = 1; i < trace->filter_pids.nr; ++i) + trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; + + intlist__delete(list); + ret = 0; +out: + return ret; +} + static int trace__open_output(struct trace *trace, const char *filename) { struct stat st; @@ -2468,6 +2594,14 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } +static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + evsel->handler = handler; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const trace_usage[] = { @@ -2502,6 +2636,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_CALLBACK(0, "event", &trace.evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), @@ -2513,6 +2650,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "trace events on existing process id"), OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), + OPT_CALLBACK(0, "filter-pids", &trace, "float", + "show only events with duration > N.M ms", trace__set_filter_pids), OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", @@ -2543,6 +2682,18 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + + trace.evlist = perf_evlist__new(); + if (trace.evlist == NULL) + return -ENOMEM; + + if (trace.evlist == NULL) { + pr_err("Not enough memory to run!\n"); + goto out; + } + argc = parse_options(argc, argv, trace_options, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -2551,6 +2702,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace.opts.sample_time = true; } + if (trace.evlist->nr_entries > 0) + evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); @@ -2558,7 +2712,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults) { + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->nr_entries == 0 /* Was --events used? */) { pr_err("Please specify something to trace.\n"); return -1; } diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b210d62907e4..3688ad29085f 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_mem(int argc, const char **argv, const char *prefix); +extern int cmd_data(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0906fc401c52..00fcaf8a5b8d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,6 +7,7 @@ perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common +perf-data mainporcelain common perf-diff mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cc224080b525..c3570b5f3bf3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,19 +11,27 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif +$(shell echo -n > .config-detected) +detected = $(shell echo "$(1)=y" >> .config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) + LIB_INCLUDE := $(srctree)/tools/lib/ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch +$(call detected_var,ARCH) + NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) + $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + $(call detected,CONFIG_X86_64) else LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif @@ -40,6 +48,10 @@ ifeq ($(ARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(NO_PERF_REGS),0) + $(call detected,CONFIG_PERF_REGS) +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -84,6 +96,17 @@ ifndef NO_LIBELF FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw endif +ifndef NO_LIBBABELTRACE + # for linking with debug library, run like: + # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ + ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib + endif + FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) + FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -114,6 +137,8 @@ ifdef PARSER_DEBUG PARSER_DEBUG_BISON := -t PARSER_DEBUG_FLEX := -d CFLAGS += -DPARSER_DEBUG + $(call detected_var,PARSER_DEBUG_BISON) + $(call detected_var,PARSER_DEBUG_FLEX) endif ifndef NO_LIBPYTHON @@ -202,6 +227,7 @@ CORE_FEATURE_TESTS = \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ + libbabeltrace \ zlib LIB_FEATURE_TESTS = \ @@ -217,6 +243,7 @@ LIB_FEATURE_TESTS = \ libslang \ libunwind \ libdw-dwarf-unwind \ + libbabeltrace \ zlib VF_FEATURE_TESTS = \ @@ -361,6 +388,7 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf + $(call detected,CONFIG_LIBELF) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT @@ -381,6 +409,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -ldw + $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF @@ -408,9 +437,11 @@ ifdef NO_LIBUNWIND dwarf-post-unwind := 0 else dwarf-post-unwind-text := libdw + $(call detected,CONFIG_LIBDW_DWARF_UNWIND) endif else dwarf-post-unwind-text := libunwind + $(call detected,CONFIG_LIBUNWIND) # Enable libunwind support by default. ifndef NO_LIBDW_DWARF_UNWIND NO_LIBDW_DWARF_UNWIND := 1 @@ -419,6 +450,7 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT + $(call detected,CONFIG_DWARF_UNWIND) else NO_DWARF_UNWIND := 1 endif @@ -447,6 +479,7 @@ ifndef NO_LIBAUDIT else CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit + $(call detected,CONFIG_AUDIT) endif endif @@ -463,6 +496,7 @@ ifndef NO_SLANG CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang + $(call detected,CONFIG_SLANG) endif endif @@ -501,6 +535,7 @@ else else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + $(call detected,CONFIG_LIBPERL) endif endif @@ -560,6 +595,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + $(call detected,CONFIG_LIBPYTHON) endif endif endif @@ -617,6 +653,7 @@ ifndef NO_ZLIB ifeq ($(feature-zlib), 1) CFLAGS += -DHAVE_ZLIB_SUPPORT EXTLIBS += -lz + $(call detected,CONFIG_ZLIB) else NO_ZLIB := 1 endif @@ -635,6 +672,7 @@ ifndef NO_LIBNUMA else CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) endif endif @@ -667,6 +705,18 @@ else NO_PERF_READ_VDSOX32 := 1 endif +ifndef NO_LIBBABELTRACE + ifeq ($(feature-libbabeltrace), 0) + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-devel/libbabeltrace-ctf-dev); + NO_LIBBABELTRACE := 1 + else + CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) + LDFLAGS += $(LIBBABELTRACE_LDFLAGS) + EXTLIBS += -lbabeltrace-ctf + $(call detected,CONFIG_LIBBABELTRACE) + endif +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -815,3 +865,19 @@ endif ifeq ($(display_lib),1) $(info ) endif + +$(call detected_var,bindir_SQ) +$(call detected_var,PYTHON_WORD) +ifneq ($(OUTPUT),) +$(call detected_var,OUTPUT) +endif +$(call detected_var,htmldir_SQ) +$(call detected_var,infodir_SQ) +$(call detected_var,mandir_SQ) +$(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,prefix_SQ) +$(call detected_var,perfexecdir_SQ) +$(call detected_var,LIBDIR) +$(call detected_var,GTK_CFLAGS) +$(call detected_var,PERL_EMBED_CCOPTS) +$(call detected_var,PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index b32ff3372514..70c9aebe9da3 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -29,6 +29,7 @@ FILES= \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ + test-libbabeltrace.bin \ test-compile-32.bin \ test-compile-x32.bin \ test-zlib.bin @@ -43,7 +44,7 @@ BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz + $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace test-hello.bin: $(BUILD) @@ -133,7 +134,10 @@ test-timerfd.bin: $(BUILD) test-libdw-dwarf-unwind.bin: - $(BUILD) + $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) + +test-libbabeltrace.bin: + $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: $(BUILD) -Werror diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 6d4d09323922..1ffc3da5ca10 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -101,6 +101,10 @@ # include "test-pthread_attr_setaffinity_np.c" #undef main +#define main main_test_libbabeltrace +# include "test-libbabeltrace.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -126,6 +130,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_libbabeltrace(); return 0; } diff --git a/tools/perf/config/feature-checks/test-libbabeltrace.c b/tools/perf/config/feature-checks/test-libbabeltrace.c new file mode 100644 index 000000000000..3b7dd68a4d52 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libbabeltrace.c @@ -0,0 +1,8 @@ + +#include <babeltrace/ctf-writer/writer.h> + +int main(void) +{ + bt_ctf_stream_class_get_packet_context_type((void *) 0); + return 0; +} diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 33569847fdcc..c2595e9bd69b 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -110,13 +110,11 @@ __perf_main () # List perf subcommands or long options if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then - __perfcomp '--help --version \ - --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" + cmds=$($cmd --list-opts) else cmds=$($cmd --list-cmds) - __perfcomp "$cmds" "$cur" fi + __perfcomp "$cmds" "$cur" # List possible events for -e option elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3700a7faca6c..b857fcbd00cf 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -13,6 +13,7 @@ #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" +#include "util/parse-options.h" #include "util/debug.h" #include <api/fs/debugfs.h> #include <pthread.h> @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { #endif { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, + { "data", cmd_data, 0 }, }; struct pager_config { @@ -124,6 +126,23 @@ static void commit_pager_choice(void) } } +struct option options[] = { + OPT_ARGUMENT("help", "help"), + OPT_ARGUMENT("version", "version"), + OPT_ARGUMENT("exec-path", "exec-path"), + OPT_ARGUMENT("html-path", "html-path"), + OPT_ARGUMENT("paginate", "paginate"), + OPT_ARGUMENT("no-pager", "no-pager"), + OPT_ARGUMENT("perf-dir", "perf-dir"), + OPT_ARGUMENT("work-tree", "work-tree"), + OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), + OPT_ARGUMENT("buildid-dir", "buildid-dir"), + OPT_ARGUMENT("list-cmds", "list-cmds"), + OPT_ARGUMENT("list-opts", "list-opts"), + OPT_ARGUMENT("debug", "debug"), + OPT_END() +}; + static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -222,6 +241,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) struct cmd_struct *p = commands+i; printf("%s ", p->cmd); } + putchar('\n'); + exit(0); + } else if (!strcmp(cmd, "--list-opts")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(options)-1; i++) { + struct option *p = options+i; + printf("--%s ", p->long_name); + } + putchar('\n'); exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1dabb8553499..1caa70a4a9e1 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -53,6 +53,7 @@ struct record_opts { bool sample_time; bool period; bool sample_intr_regs; + bool running_time; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build new file mode 100644 index 000000000000..41efd7e368b3 --- /dev/null +++ b/tools/perf/scripts/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build new file mode 100644 index 000000000000..928e110179cb --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build new file mode 100644 index 000000000000..aefc15c9444a --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build new file mode 100644 index 000000000000..2de01a4b4084 --- /dev/null +++ b/tools/perf/tests/Build @@ -0,0 +1,42 @@ +perf-y += builtin-test.o +perf-y += parse-events.o +perf-y += dso-data.o +perf-y += attr.o +perf-y += vmlinux-kallsyms.o +perf-y += open-syscall.o +perf-y += open-syscall-all-cpus.o +perf-y += open-syscall-tp-fields.o +perf-y += mmap-basic.o +perf-y += perf-record.o +perf-y += rdpmc.o +perf-y += evsel-roundtrip-name.o +perf-y += evsel-tp-sched.o +perf-y += fdarray.o +perf-y += pmu.o +perf-y += hists_common.o +perf-y += hists_link.o +perf-y += hists_filter.o +perf-y += hists_output.o +perf-y += hists_cumulate.o +perf-y += python-use.o +perf-y += bp_signal.o +perf-y += bp_signal_overflow.o +perf-y += task-exit.o +perf-y += sw-clock.o +perf-y += mmap-thread-lookup.o +perf-y += thread-mg-share.o +perf-y += switch-tracking.o +perf-y += keep-tracking.o +perf-y += code-reading.o +perf-y += sample-parsing.o +perf-y += parse-no-sample-id-all.o + +perf-$(CONFIG_X86) += perf-time-to-tsc.o + +ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +endif + +CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index caaf37f079b1..513e5febbe5a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -112,6 +112,9 @@ int test__dso_data(void) dso = dso__new((const char *)file); + TEST_ASSERT_VAL("Failed to access to dso", + dso__data_fd(dso, &machine) >= 0); + /* Basic 10 bytes tests. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { struct test_data_offset *data = &offsets[i]; @@ -243,8 +246,8 @@ int test__dso_data_cache(void) limit = nr * 4; TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); - /* and this is now our dso open FDs limit + 1 extra */ - dso_cnt = limit / 2 + 1; + /* and this is now our dso open FDs limit */ + dso_cnt = limit / 2; TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(dso_cnt, TEST_FILE_SIZE)); @@ -252,13 +255,13 @@ int test__dso_data_cache(void) struct dso *dso = dsos[i]; /* - * Open dsos via dso__data_fd or dso__data_read_offset. - * Both opens the data file and keep it open. + * Open dsos via dso__data_fd(), it opens the data + * file and keep it open (unless open file limit). */ + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + if (i % 2) { - fd = dso__data_fd(dso, &machine); - TEST_ASSERT_VAL("failed to get fd", fd > 0); - } else { #define BUFSIZE 10 u8 buf[BUFSIZE]; ssize_t n; @@ -268,7 +271,10 @@ int test__dso_data_cache(void) } } - /* open +1 dso over the allowed limit */ + /* verify the first one is already open */ + TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1); + + /* open +1 dso to reach the allowed limit */ fd = dso__data_fd(dsos[i], &machine); TEST_ASSERT_VAL("failed to get fd", fd > 0); diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 8fa82d1700c7..3ec885c48f8f 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c @@ -29,7 +29,12 @@ int test__open_syscall_event_on_all_cpus(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c index a33b2daae40f..07aa319bf334 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/open-syscall.c @@ -18,7 +18,12 @@ int test__open_syscall_event(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1cdab0ce00e2..ac243ebcb20a 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> +#include <api/fs/tracefs.h> #include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" @@ -1192,11 +1193,19 @@ static int count_tracepoints(void) { char events_path[PATH_MAX]; struct dirent *events_ent; + const char *mountpoint; DIR *events_dir; int cnt = 0; - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - debugfs_find_mountpoint()); + mountpoint = tracefs_find_mountpoint(); + if (mountpoint) { + scnprintf(events_path, PATH_MAX, "%s/events", + mountpoint); + } else { + mountpoint = debugfs_find_mountpoint(); + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + mountpoint); + } events_dir = opendir(events_path); diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build new file mode 100644 index 000000000000..0a73538c0441 --- /dev/null +++ b/tools/perf/ui/Build @@ -0,0 +1,14 @@ +libperf-y += setup.o +libperf-y += helpline.o +libperf-y += progress.o +libperf-y += util.o +libperf-y += hist.o +libperf-y += stdio/hist.o + +CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" + +libperf-$(CONFIG_SLANG) += browser.o +libperf-$(CONFIG_SLANG) += browsers/ +libperf-$(CONFIG_SLANG) += tui/ + +CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build new file mode 100644 index 000000000000..de223f5bed58 --- /dev/null +++ b/tools/perf/ui/browsers/Build @@ -0,0 +1,10 @@ +libperf-y += annotate.o +libperf-y += hists.o +libperf-y += map.o +libperf-y += scripts.o +libperf-y += header.o + +CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST +CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST +CFLAGS_map.o += -DENABLE_SLFUTURE_CONST +CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build new file mode 100644 index 000000000000..ec22e899a224 --- /dev/null +++ b/tools/perf/ui/gtk/Build @@ -0,0 +1,9 @@ +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) + +gtk-y += browser.o +gtk-y += hists.o +gtk-y += setup.o +gtk-y += util.o +gtk-y += helpline.o +gtk-y += progress.o +gtk-y += annotate.o diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build new file mode 100644 index 000000000000..9e4c6ca41a9f --- /dev/null +++ b/tools/perf/ui/tui/Build @@ -0,0 +1,4 @@ +libperf-y += setup.o +libperf-y += util.o +libperf-y += helpline.o +libperf-y += progress.o diff --git a/tools/perf/util/Build b/tools/perf/util/Build new file mode 100644 index 000000000000..a2c8047d25f7 --- /dev/null +++ b/tools/perf/util/Build @@ -0,0 +1,144 @@ +libperf-y += abspath.o +libperf-y += alias.o +libperf-y += annotate.o +libperf-y += build-id.o +libperf-y += config.o +libperf-y += ctype.o +libperf-y += db-export.o +libperf-y += environment.o +libperf-y += event.o +libperf-y += evlist.o +libperf-y += evsel.o +libperf-y += exec_cmd.o +libperf-y += find_next_bit.o +libperf-y += help.o +libperf-y += kallsyms.o +libperf-y += levenshtein.o +libperf-y += parse-options.o +libperf-y += parse-events.o +libperf-y += path.o +libperf-y += rbtree.o +libperf-y += bitmap.o +libperf-y += hweight.o +libperf-y += run-command.o +libperf-y += quote.o +libperf-y += strbuf.o +libperf-y += string.o +libperf-y += strlist.o +libperf-y += strfilter.o +libperf-y += top.o +libperf-y += usage.o +libperf-y += wrapper.o +libperf-y += sigchain.o +libperf-y += dso.o +libperf-y += symbol.o +libperf-y += color.o +libperf-y += pager.o +libperf-y += header.o +libperf-y += callchain.o +libperf-y += values.o +libperf-y += debug.o +libperf-y += machine.o +libperf-y += map.o +libperf-y += pstack.o +libperf-y += session.o +libperf-y += ordered-events.o +libperf-y += comm.o +libperf-y += thread.o +libperf-y += thread_map.o +libperf-y += trace-event-parse.o +libperf-y += parse-events-flex.o +libperf-y += parse-events-bison.o +libperf-y += pmu.o +libperf-y += pmu-flex.o +libperf-y += pmu-bison.o +libperf-y += trace-event-read.o +libperf-y += trace-event-info.o +libperf-y += trace-event-scripting.o +libperf-y += trace-event.o +libperf-y += svghelper.o +libperf-y += sort.o +libperf-y += hist.o +libperf-y += util.o +libperf-y += xyarray.o +libperf-y += cpumap.o +libperf-y += cgroup.o +libperf-y += target.o +libperf-y += rblist.o +libperf-y += intlist.o +libperf-y += vdso.o +libperf-y += stat.o +libperf-y += record.o +libperf-y += srcline.o +libperf-y += data.o +libperf-y += tsc.o +libperf-y += cloexec.o +libperf-y += thread-stack.o + +libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-event.o + +ifndef CONFIG_LIBELF +libperf-y += symbol-minimal.o +endif + +libperf-$(CONFIG_DWARF) += probe-finder.o +libperf-$(CONFIG_DWARF) += dwarf-aux.o + +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + +libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + +libperf-y += scripting-engines/ + +libperf-$(CONFIG_PERF_REGS) += perf_regs.o +libperf-$(CONFIG_ZLIB) += zlib.o + +CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ + +CFLAGS_parse-events-flex.o += -w +CFLAGS_pmu-flex.o += -w +CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_parse-events.o += -Wno-redundant-decls + +$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c72680a977f..ffdc338df925 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -93,6 +93,35 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +/* asnprintf consolidates asprintf and snprintf */ +static int asnprintf(char **strp, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!strp) + return -EINVAL; + + va_start(ap, fmt); + if (*strp) + ret = vsnprintf(*strp, size, fmt, ap); + else + ret = vasprintf(strp, fmt, ap); + va_end(ap); + + return ret; +} + +static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +{ + char *tmp = bf; + int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, + sbuild_id, sbuild_id + 2); + if (ret < 0 || (tmp && size < (unsigned int)ret)) + return NULL; + return bf; +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -101,14 +130,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return NULL; build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - if (bf == NULL) { - if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2) < 0) - return NULL; - } else - snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2); - return bf; + return build_id__filename(build_id_hex, bf, size); } #define dsos__for_each_with_build_id(pos, head) \ @@ -259,35 +281,93 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) +static char *build_id_cache__dirname_from_path(const char *name, + bool is_kallsyms, bool is_vdso) { - const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; - int len, err = -1; + char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else + if (!slash) { + realname = realpath(name, NULL); + if (!realname) + return NULL; + } + + if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname) < 0) + filename = NULL; + + if (!slash) + free(realname); + + return filename; +} + +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result) +{ + struct strlist *list; + char *dir_name; + DIR *dir; + struct dirent *d; + int ret = 0; + + list = strlist__new(true, NULL); + dir_name = build_id_cache__dirname_from_path(pathname, false, false); + if (!list || !dir_name) { + ret = -ENOMEM; + goto out; + } + + /* List up all dirents */ + dir = opendir(dir_name); + if (!dir) { + ret = -errno; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + strlist__add(list, d->d_name); + } + closedir(dir); + +out: + free(dir_name); + if (ret) + strlist__delete(list); + else + *result = list; + + return ret; +} + +int build_id_cache__add_s(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) +{ + const size_t size = PATH_MAX; + char *realname = NULL, *filename = NULL, *dir_name = NULL, + *linkname = zalloc(size), *targetname, *tmp; + int err = -1; + + if (!is_kallsyms) { realname = realpath(name, NULL); + if (!realname) + goto out_free; + } - if (realname == NULL || filename == NULL || linkname == NULL) + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + if (!dir_name) goto out_free; - len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) + if (mkdir_p(dir_name, 0755)) goto out_free; - snprintf(filename + len, size - len, "/%s", sbuild_id); + if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + filename = NULL; + goto out_free; + } if (access(filename, F_OK)) { if (is_kallsyms) { @@ -297,14 +377,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, goto out_free; } - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; + tmp = strrchr(linkname, '/'); + *tmp = '\0'; if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; + *tmp = '/'; + targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); if (symlink(targetname, linkname) == 0) @@ -313,34 +395,46 @@ out_free: if (!is_kallsyms) free(realname); free(filename); + free(dir_name); free(linkname); return err; } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) + const char *name, bool is_kallsyms, + bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); +} + +bool build_id_cache__cached(const char *sbuild_id) +{ + bool ret = false; + char *filename = build_id__filename(sbuild_id, NULL, 0); + + if (filename && !access(filename, F_OK)) + ret = true; + free(filename); + + return ret; } -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), - *linkname = zalloc(size); + *linkname = zalloc(size), *tmp; int err = -1; if (filename == NULL || linkname == NULL) goto out_free; - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; if (access(linkname, F_OK)) goto out_free; @@ -354,8 +448,8 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) /* * Since the link is relative, we must make it absolute: */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); + tmp = strrchr(linkname, '/') + 1; + snprintf(tmp, size - (tmp - linkname), "%s", filename); if (unlink(linkname)) goto out_free; @@ -367,8 +461,7 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = dso__is_vdso(dso); @@ -381,28 +474,26 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) + struct machine *machine) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) + if (dso__cache_build_id(pos, machine)) err = -1; return err; } -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine); return ret; } @@ -417,11 +508,11 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, buildid_dir); + ret = machine__cache_build_ids(&session->machines.host); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, buildid_dir); + ret |= machine__cache_build_ids(pos); } return ret ? -1 : 0; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8236319514d5..85011222cc14 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -4,6 +4,7 @@ #define BUILD_ID_SIZE 20 #include "tool.h" +#include "strlist.h" #include <linux/types.h> extern struct perf_tool build_id__mark_dso_hit_ops; @@ -22,9 +23,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result); +bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +int build_id_cache__remove_s(const char *sbuild_id); void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index d04d770d90f6..fbcca21d66ab 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -17,6 +17,7 @@ #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a123d43b..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1acc38e4..6033a0a212ca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c new file mode 100644 index 000000000000..e372e03ff480 --- /dev/null +++ b/tools/perf/util/data-convert-bt.c @@ -0,0 +1,614 @@ +/* + * CTF writing support via babeltrace. + * + * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com> + * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <linux/compiler.h> +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-writer/clock.h> +#include <babeltrace/ctf-writer/stream.h> +#include <babeltrace/ctf-writer/event.h> +#include <babeltrace/ctf-writer/event-types.h> +#include <babeltrace/ctf-writer/event-fields.h> +#include <babeltrace/ctf/events.h> +#include <traceevent/event-parse.h> +#include "asm/bug.h" +#include "data-convert-bt.h" +#include "session.h" +#include "util.h" +#include "debug.h" +#include "tool.h" +#include "evlist.h" +#include "evsel.h" +#include "machine.h" + +#define pr_N(n, fmt, ...) \ + eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) + +#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__) + +struct evsel_priv { + struct bt_ctf_event_class *event_class; +}; + +struct ctf_writer { + /* writer primitives */ + struct bt_ctf_writer *writer; + struct bt_ctf_stream *stream; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + + /* data types */ + union { + struct { + struct bt_ctf_field_type *s64; + struct bt_ctf_field_type *u64; + struct bt_ctf_field_type *s32; + struct bt_ctf_field_type *u32; + struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u64_hex; + }; + struct bt_ctf_field_type *array[6]; + } data; +}; + +struct convert { + struct perf_tool tool; + struct ctf_writer writer; + + u64 events_size; + u64 events_count; +}; + +static int value_set(struct bt_ctf_field_type *type, + struct bt_ctf_event *event, + const char *name, u64 val) +{ + struct bt_ctf_field *field; + bool sign = bt_ctf_field_type_integer_get_signed(type); + int ret; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (sign) { + ret = bt_ctf_field_signed_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } else { + ret = bt_ctf_field_unsigned_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err; + } + + pr2(" SET [%s = %" PRIu64 "]\n", name, val); + +err: + bt_ctf_field_put(field); + return ret; +} + +#define __FUNC_VALUE_SET(_name, _val_type) \ +static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \ + struct bt_ctf_event *event, \ + const char *name, \ + _val_type val) \ +{ \ + struct bt_ctf_field_type *type = cw->data._name; \ + return value_set(type, event, name, (u64) val); \ +} + +#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name) + +FUNC_VALUE_SET(s32) +FUNC_VALUE_SET(u32) +FUNC_VALUE_SET(s64) +FUNC_VALUE_SET(u64) +__FUNC_VALUE_SET(u64_hex, u64) + +static int add_generic_values(struct ctf_writer *cw, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 type = evsel->attr.sample_type; + int ret; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + + if (type & PERF_SAMPLE_IP) { + ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TID) { + ret = value_set_s32(cw, event, "perf_tid", sample->tid); + if (ret) + return -1; + + ret = value_set_s32(cw, event, "perf_pid", sample->pid); + if (ret) + return -1; + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) { + ret = value_set_u64(cw, event, "perf_id", sample->id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_CPU) { + ret = value_set_u32(cw, event, "perf_cpu", sample->cpu); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_PERIOD) { + ret = value_set_u64(cw, event, "perf_period", sample->period); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_WEIGHT) { + ret = value_set_u64(cw, event, "perf_weight", sample->weight); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + ret = value_set_u64(cw, event, "perf_data_src", + sample->data_src); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + ret = value_set_u64(cw, event, "perf_transaction", + sample->transaction); + if (ret) + return -1; + } + + return 0; +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *_event __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct convert *c = container_of(tool, struct convert, tool); + struct evsel_priv *priv = evsel->priv; + struct ctf_writer *cw = &c->writer; + struct bt_ctf_event_class *event_class; + struct bt_ctf_event *event; + int ret; + + if (WARN_ONCE(!priv, "Failed to setup all events.\n")) + return 0; + + event_class = priv->event_class; + + /* update stats */ + c->events_count++; + c->events_size += _event->header.size; + + pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count); + + event = bt_ctf_event_create(event_class); + if (!event) { + pr_err("Failed to create an CTF event\n"); + return -1; + } + + bt_ctf_clock_set_time(cw->clock, sample->time); + + ret = add_generic_values(cw, event, evsel, sample); + if (ret) + return -1; + + bt_ctf_stream_append_event(cw->stream, event); + bt_ctf_event_put(event); + return 0; +} + +static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, + struct bt_ctf_event_class *event_class) +{ + u64 type = evsel->attr.sample_type; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + +#define ADD_FIELD(cl, t, n) \ + do { \ + pr2(" field '%s'\n", n); \ + if (bt_ctf_event_class_add_field(cl, t, n)) { \ + pr_err("Failed to add field '%s;\n", n); \ + return -1; \ + } \ + } while (0) + + if (type & PERF_SAMPLE_IP) + ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip"); + + if (type & PERF_SAMPLE_TID) { + ADD_FIELD(event_class, cw->data.s32, "perf_tid"); + ADD_FIELD(event_class, cw->data.s32, "perf_pid"); + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) + ADD_FIELD(event_class, cw->data.u64, "perf_id"); + + if (type & PERF_SAMPLE_STREAM_ID) + ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); + + if (type & PERF_SAMPLE_CPU) + ADD_FIELD(event_class, cw->data.u32, "perf_cpu"); + + if (type & PERF_SAMPLE_PERIOD) + ADD_FIELD(event_class, cw->data.u64, "perf_period"); + + if (type & PERF_SAMPLE_WEIGHT) + ADD_FIELD(event_class, cw->data.u64, "perf_weight"); + + if (type & PERF_SAMPLE_DATA_SRC) + ADD_FIELD(event_class, cw->data.u64, "perf_data_src"); + + if (type & PERF_SAMPLE_TRANSACTION) + ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); + +#undef ADD_FIELD + return 0; +} + +static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) +{ + struct bt_ctf_event_class *event_class; + struct evsel_priv *priv; + const char *name = perf_evsel__name(evsel); + int ret; + + pr("Adding event '%s' (type %d)\n", name, evsel->attr.type); + + event_class = bt_ctf_event_class_create(name); + if (!event_class) + return -1; + + ret = add_generic_types(cw, evsel, event_class); + if (ret) + goto err; + + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); + if (ret) { + pr("Failed to add event class into stream.\n"); + goto err; + } + + priv = malloc(sizeof(*priv)); + if (!priv) + goto err; + + priv->event_class = event_class; + evsel->priv = priv; + return 0; + +err: + bt_ctf_event_class_put(event_class); + pr_err("Failed to add event '%s'.\n", name); + return -1; +} + +static int setup_events(struct ctf_writer *cw, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + int ret; + + evlist__for_each(evlist, evsel) { + ret = add_event(cw, evsel); + if (ret) + return ret; + } + return 0; +} + +static int ctf_writer__setup_env(struct ctf_writer *cw, + struct perf_session *session) +{ + struct perf_header *header = &session->header; + struct bt_ctf_writer *writer = cw->writer; + +#define ADD(__n, __v) \ +do { \ + if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \ + return -1; \ +} while (0) + + ADD("host", header->env.hostname); + ADD("sysname", "Linux"); + ADD("release", header->env.os_release); + ADD("version", header->env.version); + ADD("machine", header->env.arch); + ADD("domain", "kernel"); + ADD("tracer_name", "perf"); + +#undef ADD + return 0; +} + +static int ctf_writer__setup_clock(struct ctf_writer *cw) +{ + struct bt_ctf_clock *clock = cw->clock; + + bt_ctf_clock_set_description(clock, "perf clock"); + +#define SET(__n, __v) \ +do { \ + if (bt_ctf_clock_set_##__n(clock, __v)) \ + return -1; \ +} while (0) + + SET(frequency, 1000000000); + SET(offset_s, 0); + SET(offset, 0); + SET(precision, 10); + SET(is_absolute, 0); + +#undef SET + return 0; +} + +static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) +{ + struct bt_ctf_field_type *type; + + type = bt_ctf_field_type_integer_create(size); + if (!type) + return NULL; + + if (sign && + bt_ctf_field_type_integer_set_signed(type, 1)) + goto err; + + if (hex && + bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) + goto err; + + pr2("Created type: INTEGER %d-bit %ssigned %s\n", + size, sign ? "un" : "", hex ? "hex" : ""); + return type; + +err: + bt_ctf_field_type_put(type); + return NULL; +} + +static void ctf_writer__cleanup_data(struct ctf_writer *cw) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cw->data.array); i++) + bt_ctf_field_type_put(cw->data.array[i]); +} + +static int ctf_writer__init_data(struct ctf_writer *cw) +{ +#define CREATE_INT_TYPE(type, size, sign, hex) \ +do { \ + (type) = create_int_type(size, sign, hex); \ + if (!(type)) \ + goto err; \ +} while (0) + + CREATE_INT_TYPE(cw->data.s64, 64, true, false); + CREATE_INT_TYPE(cw->data.u64, 64, false, false); + CREATE_INT_TYPE(cw->data.s32, 32, true, false); + CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); + + cw->data.string = bt_ctf_field_type_string_create(); + if (cw->data.string) + return 0; + +err: + ctf_writer__cleanup_data(cw); + pr_err("Failed to create data types.\n"); + return -1; +} + +static void ctf_writer__cleanup(struct ctf_writer *cw) +{ + ctf_writer__cleanup_data(cw); + + bt_ctf_clock_put(cw->clock); + bt_ctf_stream_put(cw->stream); + bt_ctf_stream_class_put(cw->stream_class); + bt_ctf_writer_put(cw->writer); + + /* and NULL all the pointers */ + memset(cw, 0, sizeof(*cw)); +} + +static int ctf_writer__init(struct ctf_writer *cw, const char *path) +{ + struct bt_ctf_writer *writer; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_stream *stream; + struct bt_ctf_clock *clock; + + /* CTF writer */ + writer = bt_ctf_writer_create(path); + if (!writer) + goto err; + + cw->writer = writer; + + /* CTF clock */ + clock = bt_ctf_clock_create("perf_clock"); + if (!clock) { + pr("Failed to create CTF clock.\n"); + goto err_cleanup; + } + + cw->clock = clock; + + if (ctf_writer__setup_clock(cw)) { + pr("Failed to setup CTF clock.\n"); + goto err_cleanup; + } + + /* CTF stream class */ + stream_class = bt_ctf_stream_class_create("perf_stream"); + if (!stream_class) { + pr("Failed to create CTF stream class.\n"); + goto err_cleanup; + } + + cw->stream_class = stream_class; + + /* CTF clock stream setup */ + if (bt_ctf_stream_class_set_clock(stream_class, clock)) { + pr("Failed to assign CTF clock to stream class.\n"); + goto err_cleanup; + } + + if (ctf_writer__init_data(cw)) + goto err_cleanup; + + /* CTF stream instance */ + stream = bt_ctf_writer_create_stream(writer, stream_class); + if (!stream) { + pr("Failed to create CTF stream.\n"); + goto err_cleanup; + } + + cw->stream = stream; + + /* CTF clock writer setup */ + if (bt_ctf_writer_add_clock(writer, clock)) { + pr("Failed to assign CTF clock to writer.\n"); + goto err_cleanup; + } + + return 0; + +err_cleanup: + ctf_writer__cleanup(cw); +err: + pr_err("Failed to setup CTF writer.\n"); + return -1; +} + +int bt_convert__perf2ctf(const char *input, const char *path) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = input, + .mode = PERF_DATA_MODE_READ, + }; + struct convert c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + }; + struct ctf_writer *cw = &c.writer; + int err = -1; + + /* CTF writer */ + if (ctf_writer__init(cw, path)) + return -1; + + /* perf.data session */ + session = perf_session__new(&file, 0, NULL); + if (!session) + goto free_writer; + + /* CTF writer env/clock setup */ + if (ctf_writer__setup_env(cw, session)) + goto free_session; + + /* CTF events setup */ + if (setup_events(cw, session)) + goto free_session; + + err = perf_session__process_events(session, &c.tool); + if (!err) + err = bt_ctf_stream_flush(cw->stream); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", + file.path, path); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (double) c.events_size / 1024.0 / 1024.0, + c.events_count); + + /* its all good */ +free_session: + perf_session__delete(session); + +free_writer: + ctf_writer__cleanup(cw); + return err; +} diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h new file mode 100644 index 000000000000..dda30c5d0792 --- /dev/null +++ b/tools/perf/util/data-convert-bt.h @@ -0,0 +1,8 @@ +#ifndef __DATA_CONVERT_BT_H +#define __DATA_CONVERT_BT_H +#ifdef HAVE_LIBBABELTRACE_SUPPORT + +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf); + +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ +#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ad60b2f20258..2da5581ec74d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; +int debug_data_convert; static int _eprintf(int level, int var, const char *fmt, va_list args) { @@ -147,6 +148,7 @@ static struct debug_variable { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, + { .name = "data-convert", .ptr = &debug_data_convert }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index be264d6f3b30..caac2fdc6105 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,6 +12,7 @@ extern int verbose; extern bool quiet, dump_trace; extern int debug_ordered_events; +extern int debug_data_convert; #ifndef pr_fmt #define pr_fmt(fmt) fmt diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c2f7d3b90966..814554d1b857 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -45,13 +45,13 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(filename, dso->long_name, size); - debuglink = filename + dso->long_name_len; + len = __symbol__join_symfs(filename, size, dso->long_name); + debuglink = filename + len; while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; - ret = filename__read_debuglink(dso->long_name, debuglink, + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } break; @@ -240,7 +240,7 @@ static int do_open(char *name) if (fd >= 0) return fd; - pr_debug("dso open failed, mmap: %s\n", + pr_debug("dso open failed: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index cc66c4049e09..780b2bc11128 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -278,6 +278,21 @@ bool die_is_func_def(Dwarf_Die *dw_die) } /** + * die_is_func_instance - Ensure that this DIE is an instance of a subprogram + * @dw_die: a DIE + * + * Ensure that this DIE is an instance (which has an entry address). + * This returns true if @dw_die is a function instance. If not, you need to + * call die_walk_instances() to find actual instances. + **/ +bool die_is_func_instance(Dwarf_Die *dw_die) +{ + Dwarf_Addr tmp; + + /* Actually gcc optimizes non-inline as like as inlined */ + return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; +} +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b4fe90c6cb2d..af7dbcd5f929 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Ensure that this DIE is a subprogram and definition (not declaration) */ extern bool die_is_func_def(Dwarf_Die *dw_die); +/* Ensure that this DIE is an instance of a subprogram */ +extern bool die_is_func_instance(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6c6d044e959a..9e806d855b04 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -615,7 +615,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) else s = ""; - return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid); + return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } int perf_event__process_comm(struct perf_tool *tool __maybe_unused, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b8ce86bf12..8d0b62361129 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -7,7 +7,6 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "util.h" -#include <api/fs/debugfs.h> #include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" @@ -1086,6 +1085,38 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter; + int ret = -1; + size_t i; + + for (i = 0; i < npids; ++i) { + if (i == 0) { + if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) + return -1; + } else { + char *tmp; + + if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) + goto out_free; + + free(filter); + filter = tmp; + } + } + + ret = perf_evlist__set_filter(evlist, filter); +out_free: + free(filter); + return ret; +} + +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + return perf_evlist__set_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; @@ -1329,7 +1360,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar * writing exactly one byte, in workload.cork_fd, usually via * perf_evlist__start_workload(). * - * For cancelling the workload without actuallin running it, + * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() * here. diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e99a67632831..f07c984465f0 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -51,6 +51,7 @@ struct perf_evlist { struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; + struct events_stats stats; }; struct perf_evsel_str_handler { @@ -77,6 +78,8 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90e20a0..bb4eff28869e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -717,6 +734,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); + if (opts->running_time) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + /* * XXX see the function comment above * diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 38622747d130..dcf202aebe9f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -355,4 +355,8 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +static inline bool has_branch_callstack(struct perf_evsel *evsel) +{ + return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1bca3a9f2b16..9e0f60a7e7b3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1502,18 +1502,100 @@ static int remove_loops(struct branch_entry *l, int nr) return nr; } -static int thread__resolve_callchain_sample(struct thread *thread, - struct ip_callchain *chain, - struct branch_stack *branch, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +/* + * Recolve LBR callstack chain sample + * Return: + * 1 on success get LBR callchain information + * 0 no available LBR callchain information, should try fp + * negative error code on other errors. + */ +static int resolve_lbr_callchain_sample(struct thread *thread, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { + struct ip_callchain *chain = sample->callchain; + int chain_nr = min(max_stack, (int)chain->nr); + int i, j, err; + u64 ip; + + for (i = 0; i < chain_nr; i++) { + if (chain->ips[i] == PERF_CONTEXT_USER) + break; + } + + /* LBR only affects the user callchain */ + if (i != chain_nr) { + struct branch_stack *lbr_stack = sample->branch_stack; + int lbr_nr = lbr_stack->nr; + /* + * LBR callstack can only get user call chain. + * The mix_chain_nr is kernel call chain + * number plus LBR user call chain number. + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER, + * lbr_nr + 1 is the user call chain number. + * For details, please refer to the comments + * in callchain__printf + */ + int mix_chain_nr = i + 1 + lbr_nr + 1; + + if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (j = 0; j < mix_chain_nr; j++) { + if (callchain_param.order == ORDER_CALLEE) { + if (j < i + 1) + ip = chain->ips[j]; + else if (j > i + 1) + ip = lbr_stack->entries[j - i - 2].from; + else + ip = lbr_stack->entries[0].to; + } else { + if (j < lbr_nr) + ip = lbr_stack->entries[lbr_nr - j - 1].from; + else if (j > lbr_nr) + ip = chain->ips[i + 1 - (j - lbr_nr)]; + else + ip = lbr_stack->entries[0].to; + } + + err = add_callchain_ip(thread, parent, root_al, false, ip); + if (err) + return (err < 0) ? err : 0; + } + return 1; + } + + return 0; +} + +static int thread__resolve_callchain_sample(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + struct branch_stack *branch = sample->branch_stack; + struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr); int i, j, err; int skip_idx = -1; int first_call = 0; + callchain_cursor_reset(&callchain_cursor); + + if (has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, sample, parent, + root_al, max_stack); + if (err) + return (err < 0) ? err : 0; + } + /* * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. @@ -1521,8 +1603,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain->nr < PERF_MAX_STACK_DEPTH) skip_idx = arch_skip_callchain_idx(thread, chain); - callchain_cursor_reset(&callchain_cursor); - /* * Add branches to call stack for easier browsing. This gives * more context for a sample than just the callers. @@ -1623,9 +1703,9 @@ int thread__resolve_callchain(struct thread *thread, struct addr_location *root_al, int max_stack) { - int ret = thread__resolve_callchain_sample(thread, sample->callchain, - sample->branch_stack, - parent, root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, evsel, + sample, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fd4be94125fb..077ddd25189f 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -166,7 +166,7 @@ static int __ordered_events__flush(struct perf_session *s, struct ui_progress prog; int ret; - if (!tool->ordered_events || !limit) + if (!limit) return 0; if (show_progress) @@ -216,6 +216,9 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, }; int err; + if (oe->nr_events == 0) + return 0; + switch (how) { case OE_FLUSH__FINAL: oe->next_flush = ULLONG_MAX; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7f8ec6ce2823..fe07573d5ed4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -20,11 +20,6 @@ #define MAX_NAME_LEN 100 -struct event_symbol { - const char *symbol; - const char *alias; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif @@ -39,7 +34,7 @@ static struct perf_pmu_event_symbol *perf_pmu_events_list; */ static int perf_pmu_events_list_num; -static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { .symbol = "cpu-cycles", .alias = "cycles", @@ -82,7 +77,7 @@ static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { [PERF_COUNT_SW_CPU_CLOCK] = { .symbol = "cpu-clock", .alias = "", @@ -175,9 +170,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return NULL; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; @@ -473,12 +465,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event) { - int ret; - - ret = debugfs_valid_mountpoint(tracing_events_path); - if (ret) - return ret; - if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event); else @@ -1098,6 +1084,14 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; +static int cmp_string(const void *a, const void *b) +{ + const char * const *as = a; + const char * const *bs = b; + + return strcmp(*as, *bs); +} + /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -1109,18 +1103,21 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - char sbuf[STRERR_BUFSIZE]; - - if (debugfs_valid_mountpoint(tracing_events_path)) { - printf(" [ Tracepoints not available: %s ]\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return; - } + char **evt_list = NULL; + unsigned int evt_i = 0, evt_num = 0; + bool evt_num_known = false; +restart: sys_dir = opendir(tracing_events_path); if (!sys_dir) return; + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_close_sys_dir; + } + for_each_subsystem(sys_dir, sys_dirent, sys_next) { if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) @@ -1137,19 +1134,56 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, !strglobmatch(evt_dirent.d_name, event_glob)) continue; - if (name_only) { - printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + if (!evt_num_known) { + evt_num++; continue; } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + + evt_list[evt_i] = strdup(evt_path); + if (evt_list[evt_i] == NULL) + goto out_close_evt_dir; + evt_i++; } closedir(evt_dir); } closedir(sys_dir); + + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + } + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_close_evt_dir: + closedir(evt_dir); +out_close_sys_dir: + closedir(sys_dir); + + printf("FATAL: not enough memory to print %s\n", + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + if (evt_list) + goto out_free; } /* @@ -1163,9 +1197,6 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return 0; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; @@ -1233,38 +1264,19 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } -static void __print_events_type(u8 type, struct event_symbol *syms, - unsigned max) -{ - char name[64]; - unsigned i; - - for (i = 0; i < max ; i++, syms++) { - if (!is_event_supported(type, i)) - continue; - - if (strlen(syms->alias)) - snprintf(name, sizeof(name), "%s OR %s", - syms->symbol, syms->alias); - else - snprintf(name, sizeof(name), "%s", syms->symbol); - - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - } -} - -void print_events_type(u8 type) -{ - if (type == PERF_TYPE_SOFTWARE) - __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); - else - __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); -} - int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, printed = 0; + unsigned int type, op, i, evt_i = 0, evt_num = 0; char name[64]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + } for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { @@ -1282,27 +1294,66 @@ int print_hwcache_events(const char *event_glob, bool name_only) type | (op << 8) | (i << 16))) continue; - if (name_only) - printf("%s ", name); - else - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); - ++printed; + if (!evt_num_known) { + evt_num++; + continue; + } + + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } } } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_HW_CACHE]); + } + if (evt_num) printf("\n"); - return printed; + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return evt_num; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (evt_list) + goto out_free; + return evt_num; } -static void print_symbol_events(const char *event_glob, unsigned type, +void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { - unsigned i, printed = 0; + unsigned int i, evt_i = 0, evt_num = 0; char name[MAX_NAME_LEN]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max; i++, syms++) { @@ -1314,23 +1365,49 @@ static void print_symbol_events(const char *event_glob, unsigned type, if (!is_event_supported(type, i)) continue; - if (name_only) { - printf("%s ", syms->symbol); + if (!evt_num_known) { + evt_num++; continue; } - if (strlen(syms->alias)) + if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - - printed++; + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + } + if (evt_num) printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } /* @@ -1338,11 +1415,6 @@ static void print_symbol_events(const char *event_glob, unsigned type, */ void print_events(const char *event_glob, bool name_only) { - if (!name_only) { - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - } - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff6e1fa4111e..52a2dda4f954 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -116,12 +116,21 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_error(void *data, void *scanner, char const *msg); void print_events(const char *event_glob, bool name_only); -void print_events_type(u8 type); + +struct event_symbol { + const char *symbol; + const char *alias; +}; +extern struct event_symbol event_symbols_hw[]; +extern struct event_symbol event_symbols_sw[]; +void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max, + bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); -extern int valid_debugfs_mount(const char *debugfs); +int valid_event_mount(const char *eventfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 4a015f77e2b5..1457d6639b60 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -505,13 +505,18 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o break; case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { - printf("--%s ", options->long_name); + if (options->long_name) + printf("--%s ", options->long_name); options++; } + putchar('\n'); exit(130); case PARSE_OPT_LIST_SUBCMDS: - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } + putchar('\n'); exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 919937eb0be2..7c0e765fa2e3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "thread.h" #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -150,7 +151,7 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) sym = __find_kernel_function_by_name(name, &map); if (sym) return map->unmap_ip(map, sym->start) - - (reloc) ? 0 : map->reloc; + ((reloc) ? 0 : map->reloc); } return 0; } @@ -532,7 +533,7 @@ static int get_real_path(const char *raw_path, const char *comp_dir, else { if (access(raw_path, R_OK) == 0) { *new_path = strdup(raw_path); - return 0; + return *new_path ? 0 : -ENOMEM; } else return -errno; } @@ -548,9 +549,11 @@ static int get_real_path(const char *raw_path, const char *comp_dir, if (access(*new_path, R_OK) == 0) return 0; - if (!symbol_conf.source_prefix) + if (!symbol_conf.source_prefix) { /* In case of searching comp_dir, don't retry */ + zfree(new_path); return -errno; + } switch (errno) { case ENAMETOOLONG: @@ -1805,7 +1808,7 @@ static void print_open_warning(int err, bool is_kprobe) " - please rebuild kernel with %s.\n", is_kprobe ? 'k' : 'u', config); } else if (err == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else pr_warning("Failed to open %cprobe_events: %s\n", is_kprobe ? 'k' : 'u', @@ -1816,7 +1819,7 @@ static void print_both_open_warning(int kerr, int uerr) { /* Both kprobes and uprobes are disabled, warn it. */ if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else if (kerr == -ENOENT && uerr == -ENOENT) pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " "or/and CONFIG_UPROBE_EVENTS.\n"); @@ -1833,13 +1836,20 @@ static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; const char *__debugfs; + const char *tracing_dir = ""; int ret; - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; - ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } + + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -1855,12 +1865,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("tracing/kprobe_events", readwrite); + return open_probe_events("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("tracing/uprobe_events", readwrite); + return open_probe_events("uprobe_events", readwrite); } /* Get raw string list of current kprobe_events or uprobe_events */ @@ -1895,6 +1905,95 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) return sl; } +struct kprobe_blacklist_node { + struct list_head list; + unsigned long start; + unsigned long end; + char *symbol; +}; + +static void kprobe_blacklist__delete(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + + while (!list_empty(blacklist)) { + node = list_first_entry(blacklist, + struct kprobe_blacklist_node, list); + list_del(&node->list); + free(node->symbol); + free(node); + } +} + +static int kprobe_blacklist__load(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + const char *__debugfs = debugfs_find_mountpoint(); + char buf[PATH_MAX], *p; + FILE *fp; + int ret; + + if (__debugfs == NULL) + return -ENOTSUP; + + ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs); + if (ret < 0) + return ret; + + fp = fopen(buf, "r"); + if (!fp) + return -errno; + + ret = 0; + while (fgets(buf, PATH_MAX, fp)) { + node = zalloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + INIT_LIST_HEAD(&node->list); + list_add_tail(&node->list, blacklist); + if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + ret = -EINVAL; + break; + } + p = strchr(buf, '\t'); + if (p) { + p++; + if (p[strlen(p) - 1] == '\n') + p[strlen(p) - 1] = '\0'; + } else + p = (char *)"unknown"; + node->symbol = strdup(p); + if (!node->symbol) { + ret = -ENOMEM; + break; + } + pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + node->start, node->end, node->symbol); + ret++; + } + if (ret < 0) + kprobe_blacklist__delete(blacklist); + fclose(fp); + + return ret; +} + +static struct kprobe_blacklist_node * +kprobe_blacklist__find_by_address(struct list_head *blacklist, + unsigned long address) +{ + struct kprobe_blacklist_node *node; + + list_for_each_entry(node, blacklist, list) { + if (node->start <= address && address <= node->end) + return node; + } + + return NULL; +} + /* Show an event */ static int show_perf_probe_event(struct perf_probe_event *pev, const char *module) @@ -2109,6 +2208,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, char buf[64]; const char *event, *group; struct strlist *namelist; + LIST_HEAD(blacklist); + struct kprobe_blacklist_node *node; if (pev->uprobes) fd = open_uprobe_events(true); @@ -2126,11 +2227,25 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, pr_debug("Failed to get current event list.\n"); return -EIO; } + /* Get kprobe blacklist if exists */ + if (!pev->uprobes) { + ret = kprobe_blacklist__load(&blacklist); + if (ret < 0) + pr_debug("No kprobe blacklist support, ignored\n"); + } ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + /* Ensure that the address is NOT blacklisted */ + node = kprobe_blacklist__find_by_address(&blacklist, + tev->point.address); + if (node) { + pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol); + continue; + } + if (pev->event) event = pev->event; else @@ -2181,13 +2296,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, allow_suffix = true; } - if (ret >= 0) { + /* Note that it is possible to skip all events because of blacklist */ + if (ret >= 0 && tev->event) { /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } + kprobe_blacklist__delete(&blacklist); strlist__delete(namelist); close(fd); return ret; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b5247d777f0e..d14193518e4d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -915,17 +915,13 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; param->retval = find_probe_point_by_line(pf); - } else if (!dwarf_func_inline(sp_die)) { + } else if (die_is_func_instance(sp_die)) { + /* Instances always have the entry address */ + dwarf_entrypc(sp_die, &pf->addr); /* Real function */ if (pp->lazy_line) param->retval = find_probe_point_lazy(sp_die, pf); else { - if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry address of " - "%s.\n", dwarf_diename(sp_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; - } pf->addr += pp->offset; /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); @@ -1536,7 +1532,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) + if (!die_is_func_instance(sp_die)) param->retval = die_walk_instances(sp_die, line_range_inline_cb, lf); else diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 6c6a6953fa93..4d28624a1eca 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -17,6 +17,5 @@ util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -../lib/api/fs/fs.c util/trace-event.c ../../lib/rbtree.c diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build new file mode 100644 index 000000000000..6516e220c247 --- /dev/null +++ b/tools/perf/util/scripting-engines/Build @@ -0,0 +1,6 @@ +libperf-$(CONFIG_LIBPERL) += trace-event-perl.o +libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o + +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default + +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0baf75f12b7c..e4f166981ff0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -537,7 +537,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", oe->last_flush_type); - s->stats.nr_unordered_events++; + s->evlist->stats.nr_unordered_events++; } new = ordered_events__new(oe, timestamp, event); @@ -553,15 +553,67 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, return 0; } -static void callchain__printf(struct perf_sample *sample) +static void callchain__lbr_callstack_printf(struct perf_sample *sample) { + struct ip_callchain *callchain = sample->callchain; + struct branch_stack *lbr_stack = sample->branch_stack; + u64 kernel_callchain_nr = callchain->nr; unsigned int i; - printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + for (i = 0; i < kernel_callchain_nr; i++) { + if (callchain->ips[i] == PERF_CONTEXT_USER) + break; + } + + if ((i != kernel_callchain_nr) && lbr_stack->nr) { + u64 total_nr; + /* + * LBR callstack can only get user call chain, + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER. + * + * The user call chain is stored in LBR registers. + * LBR are pair registers. The caller is stored + * in "from" register, while the callee is stored + * in "to" register. + * For example, there is a call stack + * "A"->"B"->"C"->"D". + * The LBR registers will recorde like + * "C"->"D", "B"->"C", "A"->"B". + * So only the first "to" register and all "from" + * registers are needed to construct the whole stack. + */ + total_nr = i + 1 + lbr_stack->nr + 1; + kernel_callchain_nr = i + 1; + + printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr); + + for (i = 0; i < kernel_callchain_nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + i, callchain->ips[i]); + + printf("..... %2d: %016" PRIx64 "\n", + (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + for (i = 0; i < lbr_stack->nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + } +} + +static void callchain__printf(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + unsigned int i; + struct ip_callchain *callchain = sample->callchain; + + if (has_branch_callstack(evsel)) + callchain__lbr_callstack_printf(sample); - for (i = 0; i < sample->callchain->nr; i++) + printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); + + for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - i, sample->callchain->ips[i]); + i, callchain->ips[i]); } static void branch_stack__printf(struct perf_sample *sample) @@ -636,14 +688,14 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_session__print_tstamp(struct perf_session *session, +static void perf_evlist__print_tstamp(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(session->evlist)) { + !perf_evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -685,7 +737,7 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.one.id, sample->read.one.value); } -static void dump_event(struct perf_session *session, union perf_event *event, +static void dump_event(struct perf_evlist *evlist, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -697,7 +749,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, trace_event(event); if (sample) - perf_session__print_tstamp(session, event, sample); + perf_evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); @@ -718,9 +770,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) - callchain__printf(sample); + callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -745,8 +797,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } -static struct machine * - perf_session__find_machine_for_cpumode(struct perf_session *session, +static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { @@ -764,26 +815,24 @@ static struct machine * else pid = sample->pid; - machine = perf_session__find_machine(session, pid); + machine = machines__find(machines, pid); if (!machine) - machine = perf_session__findnew_machine(session, - DEFAULT_GUEST_KERNEL_ID); + machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } - return &session->machines.host; + return &machines->host; } -static int deliver_sample_value(struct perf_session *session, +static int deliver_sample_value(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid; + struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); - sid = perf_evlist__id2sid(session->evlist, v->id); if (sid) { sample->id = v->id; sample->period = v->value - sid->period; @@ -791,14 +840,14 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } return tool->sample(tool, event, sample, sid->evsel, machine); } -static int deliver_sample_group(struct perf_session *session, +static int deliver_sample_group(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -808,7 +857,7 @@ static int deliver_sample_group(struct perf_session *session, u64 i; for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(session, tool, event, sample, + ret = deliver_sample_value(evlist, tool, event, sample, &sample->read.group.values[i], machine); if (ret) @@ -819,7 +868,7 @@ static int deliver_sample_group(struct perf_session *session, } static int -perf_session__deliver_sample(struct perf_session *session, + perf_evlist__deliver_sample(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -836,10 +885,10 @@ perf_session__deliver_sample(struct perf_session *session, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) - return deliver_sample_group(session, tool, event, sample, + return deliver_sample_group(evlist, tool, event, sample, machine); else - return deliver_sample_value(session, tool, event, sample, + return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); } @@ -848,29 +897,28 @@ int perf_session__deliver_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; struct machine *machine; - dump_event(session, event, file_offset, sample); + dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); + evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = perf_session__find_machine_for_cpumode(session, event, - sample); + machine = machines__find_for_cpumode(&session->machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->stats.nr_unprocessable_samples; + ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_session__deliver_sample(session, tool, event, - sample, evsel, machine); + return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: @@ -883,7 +931,7 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->stats.total_lost += event->lost.lost; + evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -892,7 +940,7 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->stats.nr_unknown_events; + ++evlist->stats.nr_unknown_events; return -1; } } @@ -905,7 +953,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, int fd = perf_data_file__fd(session->file); int err; - dump_event(session, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { @@ -942,7 +990,7 @@ int perf_session__deliver_synth_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&session->evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, 0); @@ -1019,16 +1067,17 @@ static s64 perf_session__process_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; struct perf_sample sample; int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, perf_evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, file_offset); @@ -1036,7 +1085,7 @@ static s64 perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample); + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) return ret; @@ -1076,47 +1125,47 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_tool *tool) +static void perf_tool__warn_about_errors(const struct perf_tool *tool, + const struct events_stats *stats) { if (tool->lost == perf_event__process_lost && - session->stats.nr_events[PERF_RECORD_LOST] != 0) { + stats->nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->stats.nr_events[0], - session->stats.nr_events[PERF_RECORD_LOST]); + stats->nr_events[0], + stats->nr_events[PERF_RECORD_LOST]); } - if (session->stats.nr_unknown_events != 0) { + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_unknown_events); + stats->nr_unknown_events); } - if (session->stats.nr_unknown_id != 0) { + if (stats->nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->stats.nr_unknown_id); + stats->nr_unknown_id); } - if (session->stats.nr_invalid_chains != 0) { - ui__warning("Found invalid callchains!\n\n" - "%u out of %u events were discarded for this reason.\n\n" - "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_invalid_chains, - session->stats.nr_events[PERF_RECORD_SAMPLE]); - } + if (stats->nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + stats->nr_invalid_chains, + stats->nr_events[PERF_RECORD_SAMPLE]); + } - if (session->stats.nr_unprocessable_samples != 0) { + if (stats->nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->stats.nr_unprocessable_samples); + stats->nr_unprocessable_samples); } - if (session->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); + if (stats->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", stats->nr_unordered_events); } volatile int session_done; @@ -1206,7 +1255,7 @@ done: err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); out_err: free(buf); - perf_session__warn_about_errors(session, tool); + perf_tool__warn_about_errors(tool, &session->evlist->stats); ordered_events__free(&session->ordered_events); return err; } @@ -1351,7 +1400,7 @@ out: err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session, tool); + perf_tool__warn_about_errors(tool, &session->evlist->stats); ordered_events__free(&session->ordered_events); session->one_mmap = false; return err; @@ -1436,7 +1485,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret = fprintf(fp, "Aggregated stats:\n"); - ret += events_stats__fprintf(&session->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 6d663dc76404..fe859f379ca7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,7 +20,6 @@ struct perf_session { struct machines machines; struct perf_evlist *evlist; struct trace_event tevent; - struct events_stats stats; bool repipe; bool one_mmap; void *one_mmap_addr; diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d0aee4b9dfd4..1833103768cb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,7 +25,7 @@ cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPIKFS') +libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7a39c1ed8d37..4593f36ecc4c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1463,6 +1463,15 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; + /* + * perf diff displays the performance difference amongst + * two or more perf.data files. Those files could come + * from different binaries. So we should not compare + * their ips, but the name of symbol. + */ + if (sort__mode == SORT_MODE__DIFF) + sd->entry->se_collapse = sort__sym_sort; + } else if (sd->entry == &sort_dso) { sort__has_dso = 1; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 33b7a2aef713..ada16762fac2 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -74,6 +74,10 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +#ifndef STT_GNU_IFUNC +#define STT_GNU_IFUNC 10 +#endif + static inline int elf_sym__is_function(const GElf_Sym *sym) { return (elf_sym__type(sym) == STT_FUNC || @@ -864,10 +868,9 @@ int dso__load_sym(struct dso *dso, struct map *map, /* Reject ARM ELF "mapping symbols": these aren't unique and * don't identify functions, so will confuse the profile * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) + if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) { + if (elf_name[0] == '$' && strchr("adtx", elf_name[1]) + && (elf_name[2] == '\0' || elf_name[2] == '.')) continue; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c36636fd825b..25d6c737be3e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -112,8 +112,8 @@ unsigned long long read_size(struct event_format *event, void *ptr, int size) return pevent_read_number(event->pevent, ptr, size); } -void event_format__print(struct event_format *event, - int cpu, void *data, int size) +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp) { struct pevent_record record; struct trace_seq s; @@ -125,10 +125,16 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); - trace_seq_do_printf(&s); + trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } +void event_format__print(struct event_format *event, + int cpu, void *data, int size) +{ + return event_format__fprintf(event, cpu, data, size, stdout); +} + void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 52aaa19e1eb1..356629a30ca9 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,9 @@ trace_event__tp_format(const char *sys, const char *name); int bigendian(void); +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp); + void event_format__print(struct event_format *event, int cpu, void *data, int size); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b86744f29eef..4ee6d0d4c993 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -269,6 +269,13 @@ void dump_stack(void) void dump_stack(void) {} #endif +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + exit(sig); +} + void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -303,13 +310,26 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *mountpoint) +static void set_tracing_events_path(const char *tracing, const char *mountpoint) { - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", - mountpoint, "tracing/events"); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); } -const char *perf_debugfs_mount(const char *mountpoint) +static const char *__perf_tracefs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = tracefs_mount(mountpoint); + if (!mnt) + return NULL; + + set_tracing_events_path("", mnt); + + return mnt; +} + +static const char *__perf_debugfs_mount(const char *mountpoint) { const char *mnt; @@ -317,7 +337,20 @@ const char *perf_debugfs_mount(const char *mountpoint) if (!mnt) return NULL; - set_tracing_events_path(mnt); + set_tracing_events_path("tracing/", mnt); + + return mnt; +} + +const char *perf_debugfs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = __perf_tracefs_mount(mountpoint); + if (mnt) + return mnt; + + mnt = __perf_debugfs_mount(mountpoint); return mnt; } @@ -325,12 +358,19 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path(mntpt); + set_tracing_events_path("tracing/", mntpt); +} + +static const char *find_tracefs(void) +{ + const char *path = __perf_tracefs_mount(NULL); + + return path; } static const char *find_debugfs(void) { - const char *path = perf_debugfs_mount(NULL); + const char *path = __perf_debugfs_mount(NULL); if (!path) fprintf(stderr, "Your kernel does not support the debugfs filesystem"); @@ -344,6 +384,7 @@ static const char *find_debugfs(void) */ const char *find_tracing_dir(void) { + const char *tracing_dir = ""; static char *tracing; static int tracing_found; const char *debugfs; @@ -351,11 +392,15 @@ const char *find_tracing_dir(void) if (tracing_found) return tracing; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; + debugfs = find_tracefs(); + if (!debugfs) { + tracing_dir = "/tracing"; + debugfs = find_debugfs(); + if (!debugfs) + return NULL; + } - if (asprintf(&tracing, "%s/tracing", debugfs) < 0) + if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) return NULL; tracing_found = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 027a5153495c..fbd598afc606 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <sys/ttydefaults.h> #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -276,6 +277,7 @@ char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); +void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index f4b953354ff7..eec688041500 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 2f0f34a36db4..5da129e10aa2 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslibcfs.c b/tools/power/acpi/os_specific/service_layers/oslibcfs.c index c13ff9c51d74..b51e40a9a120 100644 --- a/tools/power/acpi/os_specific/service_layers/oslibcfs.c +++ b/tools/power/acpi/os_specific/service_layers/oslibcfs.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 0dc2485dedf5..92f1fd700344 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index 733f9e490fc4..e153fcb12b1a 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 99b47b6194a3..3853a7350440 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 7ccb073f8316..6858c0893c91 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index a2d37d610639..84bdef0136cb 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 24d32968802d..c736adf5fb55 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index d470046a6d81..8f2fe168228e 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 853b4da22c3e..d0ba6535f5af 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 2e2ba2efa0d9..3ed7c0476d48 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -209,7 +209,7 @@ $(OUTPUT)%.o: %.c $(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) $(ECHO) " CC " $@ - $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@ + $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -Wl,-rpath=./ -lrt -lpci -L$(OUTPUT) -o $@ $(QUIET) $(STRIPCMD) $@ $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 56bfb523c5bb..feea7ad9500b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -9,40 +9,50 @@ turbostat \- Report processor frequency and idle statistics .br .B turbostat .RB [ Options ] -.RB [ "\-i interval_sec" ] +.RB [ "\--interval seconds" ] .SH DESCRIPTION \fBturbostat \fP reports processor topology, frequency, -idle power-state statistics, temperature and power on modern X86 processors. -Either \fBcommand\fP is forked and statistics are printed -upon its completion, or statistics are printed periodically. - -\fBturbostat \fP -must be run on root, and -minimally requires that the processor -supports an "invariant" TSC, plus the APERF and MPERF MSRs. -Additional information is reported depending on hardware counter support. - +idle power-state statistics, temperature and power on X86 processors. +There are two ways to invoke turbostat. +The first method is to supply a +\fBcommand\fP, which is forked and statistics are printed +upon its completion. +The second method is to omit the command, +and turbostat displays statistics every 5 seconds. +The 5-second interval can be changed using the --interval option. + +Some information is not available on older processors. .SS Options -The \fB-p\fP option limits output to the 1st thread in 1st core of each package. +\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. +.PP +\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. +.PP +\fB--Dump\fP displays the raw counter values. +.PP +\fB--debug\fP displays additional system configuration information. Invoking this parameter +more than once may also enable internal turbostat debug information. +.PP +\fB--interval seconds\fP overrides the default 5-second measurement interval. +.PP +\fB--help\fP displays usage for the most common parameters. .PP -The \fB-P\fP option limits output to the 1st thread in each Package. +\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. .PP -The \fB-S\fP option limits output to a 1-line System Summary for each interval. +\fB--MSR MSR#\fP shows the specified 64-bit MSR value. .PP -The \fB-v\fP option increases verbosity. +\fB--msr MSR#\fP shows the specified 32-bit MSR value. .PP -The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +\fB--Package\fP limits output to the system summary plus the 1st thread in each Package. .PP -The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. .PP -The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +\fB--Summary\fP limits output to a 1-line System Summary for each interval. .PP -The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. +\fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value. This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature. .PP -The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. -The default is 5 seconds. +\fB--version\fP displays the version. .PP -The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit, +The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, displays the statistics gathered since it was forked. .PP .SH FIELD DESCRIPTIONS @@ -52,7 +62,7 @@ displays the statistics gathered since it was forked. \fBCPU\fP Linux CPU (logical processor) number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. \fBAVG_MHz\fP number of cycles executed divided by time elapsed. -\fB%Buzy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. +\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. @@ -68,7 +78,7 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T .fi .PP .SH EXAMPLE -Without any parameters, turbostat prints out counters ever 5 seconds. +Without any parameters, turbostat displays statistics ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). @@ -91,19 +101,19 @@ Subsequent rows show per-CPU statistics. 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23 3 7 5 0.31 1596 3492 0 0.33 .fi -.SH VERBOSE EXAMPLE -The "-v" option adds verbosity to the output: +.SH DEBUG EXAMPLE +The "--debug" option prints additional system information before measurements: .nf -[root@ivy]# turbostat -v -turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> +turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org> CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) CPUID(6): APERF, DTS, PTM, EPB -RAPL: 851 sec. Joule Counter Range +RAPL: 851 sec. Joule Counter Range, at 77 Watts cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 16 * 100 = 1600 MHz max efficiency 35 * 100 = 3500 MHz TSC frequency -cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) +cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled) +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n) cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores @@ -112,9 +122,9 @@ cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) -cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) +cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked) cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) -cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled) cpu0: MSR_PP0_POLICY: 0 cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) @@ -123,19 +133,20 @@ cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) -cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) -cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) -cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) +cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1) cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) ... .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency -available at the minimum package voltage. The \fBTSC frequency\fP is the nominal -maximum frequency of the processor if turbo-mode were not available. This frequency +available at the minimum package voltage. The \fBTSC frequency\fP is the base +frequency of the processor -- this should match the brand string +in /proc/cpuinfo. This base frequency should be sustainable on all CPUs indefinitely, given nominal power and cooling. The remaining rows show what maximum turbo frequency is possible -depending on the number of idle cores. Note that this information is -not available on all processors. +depending on the number of idle cores. Note that not all information is +available on all processors. .SH FORK EXAMPLE If turbostat is invoked with a command, it will fork that command and output the statistics gathered when the command exits. @@ -176,6 +187,11 @@ not including any non-busy idle time. .B "turbostat " must be run as root. +Alternatively, non-root users can be enabled to run turbostat this way: + +# setcap cap_sys_rawio=ep ./turbostat + +# chmod +r /dev/cpu/*/msr .B "turbostat " reads hardware counters, but doesn't write them. @@ -184,15 +200,33 @@ multiple invocations of itself. \fBturbostat \fP may work poorly on Linux-2.6.20 through 2.6.29, -as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs in those kernels. -If the TSC column does not make sense, then -the other numbers will also make no sense. -Turbostat is lightweight, and its data collection is not atomic. -These issues are usually caused by an extremely short measurement -interval (much less than 1 second), or system activity that prevents -turbostat from being able to run on all CPUS to quickly collect data. +AVG_MHz = APERF_delta/measurement_interval. This is the actual +number of elapsed cycles divided by the entire sample interval -- +including idle time. Note that this calculation is resilient +to systems lacking a non-stop TSC. + +TSC_MHz = TSC_delta/measurement_interval. +On a system with an invariant TSC, this value will be constant +and will closely match the base frequency value shown +in the brand string in /proc/cpuinfo. On a system where +the TSC stops in idle, TSC_MHz will drop +below the processor's base frequency. + +%Busy = MPERF_delta/TSC_delta + +Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval + +Note that these calculations depend on TSC_delta, so they +are not reliable during intervals when TSC_MHz is not running at the base frequency. + +Turbostat data collection is not atomic. +Extremely short measurement intervals (much less than 1 second), +or system activity that prevents turbostat from being able +to run on all CPUS to quickly collect data, will result in +inconsistent results. The APERF, MPERF MSRs are defined to count non-halted cycles. Although it is not guaranteed by the architecture, turbostat assumes diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5b1b807265a1..2d089cac8580 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -33,24 +33,29 @@ #include <signal.h> #include <sys/time.h> #include <stdlib.h> +#include <getopt.h> #include <dirent.h> #include <string.h> #include <ctype.h> #include <sched.h> #include <cpuid.h> +#include <linux/capability.h> +#include <errno.h> char *proc_stat = "/proc/stat"; -unsigned int interval_sec = 5; /* set with -i interval_sec */ -unsigned int verbose; /* set with -v */ -unsigned int rapl_verbose; /* set with -R */ -unsigned int rapl_joules; /* set with -J */ -unsigned int thermal_verbose; /* set with -T */ -unsigned int summary_only; /* set with -S */ -unsigned int dump_only; /* set with -s */ +unsigned int interval_sec = 5; +unsigned int debug; +unsigned int rapl_joules; +unsigned int summary_only; +unsigned int dump_only; unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_pc2; +unsigned int do_pc3; +unsigned int do_pc6; +unsigned int do_pc7; unsigned int do_c8_c9_c10; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; @@ -59,8 +64,8 @@ unsigned int has_epb; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; -unsigned int do_nehalem_platform_info; -unsigned int do_nehalem_turbo_ratio_limit; +unsigned int do_nhm_platform_info; +unsigned int do_nhm_turbo_ratio_limit; unsigned int do_ivt_turbo_ratio_limit; unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; @@ -81,6 +86,9 @@ unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_energy_units, rapl_time_units; double rapl_joule_counter_range; +unsigned int do_core_perf_limit_reasons; +unsigned int do_gfx_perf_limit_reasons; +unsigned int do_ring_perf_limit_reasons; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -251,15 +259,13 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) - return -1; + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof *msr, offset); close(fd); - if (retval != sizeof *msr) { - fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset); - return -1; - } + if (retval != sizeof *msr) + err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); return 0; } @@ -281,7 +287,7 @@ void print_header(void) outp += sprintf(outp, " CPU"); if (has_aperf) outp += sprintf(outp, " Avg_MHz"); - if (do_nhm_cstates) + if (has_aperf) outp += sprintf(outp, " %%Busy"); if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); @@ -310,13 +316,13 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); - if (do_snb_cstates) + if (do_pc2) outp += sprintf(outp, " Pkg%%pc2"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc3) outp += sprintf(outp, " Pkg%%pc3"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc6) outp += sprintf(outp, " Pkg%%pc6"); - if (do_snb_cstates) + if (do_pc7) outp += sprintf(outp, " Pkg%%pc7"); if (do_c8_c9_c10) { outp += sprintf(outp, " Pkg%%pc8"); @@ -337,7 +343,7 @@ void print_header(void) outp += sprintf(outp, " PKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, " RAM_%%"); - } else { + } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, " Pkg_J"); if (do_rapl & RAPL_CORES) @@ -391,9 +397,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, if (p) { outp += sprintf(outp, "package: %d\n", p->package_id); outp += sprintf(outp, "pc2: %016llX\n", p->pc2); - outp += sprintf(outp, "pc3: %016llX\n", p->pc3); - outp += sprintf(outp, "pc6: %016llX\n", p->pc6); - outp += sprintf(outp, "pc7: %016llX\n", p->pc7); + if (do_pc3) + outp += sprintf(outp, "pc3: %016llX\n", p->pc3); + if (do_pc6) + outp += sprintf(outp, "pc6: %016llX\n", p->pc6); + if (do_pc7) + outp += sprintf(outp, "pc7: %016llX\n", p->pc7); outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); outp += sprintf(outp, "pc10: %016llX\n", p->pc10); @@ -457,25 +466,25 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%8d", t->cpu_id); } - /* AvgMHz */ + /* Avg_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", 1.0 / units * t->aperf / interval_float); - /* %c0 */ - if (do_nhm_cstates) { + /* %Busy */ + if (has_aperf) { if (!skip_c0) outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); else outp += sprintf(outp, "********"); } - /* BzyMHz */ + /* Bzy_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); - /* TSC */ + /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); /* SMI */ @@ -525,13 +534,13 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); - if (do_snb_cstates) + if (do_pc2) outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc3) outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc6) outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); - if (do_snb_cstates) + if (do_pc7) outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); if (do_c8_c9_c10) { outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); @@ -561,7 +570,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - } else { + } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units); @@ -578,8 +587,8 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - outp += sprintf(outp, fmt8, interval_float); + outp += sprintf(outp, fmt8, interval_float); } done: outp += sprintf(outp, "\n"); @@ -628,9 +637,12 @@ void delta_package(struct pkg_data *new, struct pkg_data *old) { old->pc2 = new->pc2 - old->pc2; - old->pc3 = new->pc3 - old->pc3; - old->pc6 = new->pc6 - old->pc6; - old->pc7 = new->pc7 - old->pc7; + if (do_pc3) + old->pc3 = new->pc3 - old->pc3; + if (do_pc6) + old->pc6 = new->pc6 - old->pc6; + if (do_pc7) + old->pc7 = new->pc7 - old->pc7; old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; @@ -670,24 +682,26 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = new->c1 - old->c1; - if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { - old->aperf = new->aperf - old->aperf; - old->mperf = new->mperf - old->mperf; - } else { + if (has_aperf) { + if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { + old->aperf = new->aperf - old->aperf; + old->mperf = new->mperf - old->mperf; + } else { - if (!aperf_mperf_unstable) { - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + if (!aperf_mperf_unstable) { + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(stderr, "* Frequency results do not cover entire interval *\n"); + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); - aperf_mperf_unstable = 1; + aperf_mperf_unstable = 1; + } + /* + * mperf delta is likely a huge "positive" number + * can not use it for calculating c0 time + */ + skip_c0 = 1; + skip_c1 = 1; } - /* - * mperf delta is likely a huge "positive" number - * can not use it for calculating c0 time - */ - skip_c0 = 1; - skip_c1 = 1; } @@ -712,7 +726,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, } if (old->mperf == 0) { - if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); + if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); old->mperf = 1; /* divide by 0 protection */ } @@ -769,9 +783,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->core_temp_c = 0; p->pc2 = 0; - p->pc3 = 0; - p->pc6 = 0; - p->pc7 = 0; + if (do_pc3) + p->pc3 = 0; + if (do_pc6) + p->pc6 = 0; + if (do_pc7) + p->pc7 = 0; p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; @@ -810,9 +827,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, return 0; average.packages.pc2 += p->pc2; - average.packages.pc3 += p->pc3; - average.packages.pc6 += p->pc6; - average.packages.pc7 += p->pc7; + if (do_pc3) + average.packages.pc3 += p->pc3; + if (do_pc6) + average.packages.pc6 += p->pc6; + if (do_pc7) + average.packages.pc7 += p->pc7; average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; @@ -854,9 +874,12 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c7 /= topo.num_cores; average.packages.pc2 /= topo.num_packages; - average.packages.pc3 /= topo.num_packages; - average.packages.pc6 /= topo.num_packages; - average.packages.pc7 /= topo.num_packages; + if (do_pc3) + average.packages.pc3 /= topo.num_packages; + if (do_pc6) + average.packages.pc6 /= topo.num_packages; + if (do_pc7) + average.packages.pc7 /= topo.num_packages; average.packages.pc8 /= topo.num_packages; average.packages.pc9 /= topo.num_packages; @@ -956,18 +979,18 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_pc3) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; + if (do_pc6) if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) return -10; - } - if (do_snb_cstates) { + if (do_pc2) if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) return -11; + if (do_pc7) if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) return -12; - } if (do_c8_c9_c10) { if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) return -13; @@ -1014,12 +1037,43 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: + * If you change the values, note they are used both in comparisons + * (>= PCL__7) and to index pkg_cstate_limit_strings[]. + */ + +#define PCLUKN 0 /* Unknown */ +#define PCLRSV 1 /* Reserved */ +#define PCL__0 2 /* PC0 */ +#define PCL__1 3 /* PC1 */ +#define PCL__2 4 /* PC2 */ +#define PCL__3 5 /* PC3 */ +#define PCL__4 6 /* PC4 */ +#define PCL__6 7 /* PC6 */ +#define PCL_6N 8 /* PC6 No Retention */ +#define PCL_6R 9 /* PC6 Retention */ +#define PCL__7 10 /* PC7 */ +#define PCL_7S 11 /* PC7 Shrink */ +#define PCLUNL 12 /* Unlimited */ + +int pkg_cstate_limit = PCLUKN; +char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"}; + +int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL}; +int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL}; +int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL}; +int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7}; +int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; +int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL}; + void print_verbose_header(void) { unsigned long long msr; unsigned int ratio; - if (!do_nehalem_platform_info) + if (!do_nhm_platform_info) return; get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); @@ -1093,46 +1147,16 @@ print_nhm_turbo_ratio_limits: fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7); - - - switch(msr & 0x7) { - case 0: - fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0"); - break; - case 1: - fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0"); - break; - case 2: - fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3"); - break; - case 3: - fprintf(stderr, do_slm_cstates ? "invalid" : "pc6"); - break; - case 4: - fprintf(stderr, do_slm_cstates ? "pc4" : "pc7"); - break; - case 5: - fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid"); - break; - case 6: - fprintf(stderr, do_slm_cstates ? "pc6" : "invalid"); - break; - case 7: - fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited"); - break; - default: - fprintf(stderr, "invalid"); - } - fprintf(stderr, ")\n"); + (unsigned int)msr & 7, + pkg_cstate_limit_strings[pkg_cstate_limit]); - if (!do_nehalem_turbo_ratio_limit) + if (!do_nhm_turbo_ratio_limit) return; get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); @@ -1178,6 +1202,7 @@ print_nhm_turbo_ratio_limits: if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); + } void free_all_buffers(void) @@ -1458,18 +1483,66 @@ void check_dev_msr() struct stat sb; if (stat("/dev/cpu/0/msr", &sb)) - err(-5, "no /dev/cpu/0/msr\n" - "Try \"# modprobe msr\""); + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } -void check_super_user() +void check_permissions() { - if (getuid() != 0) - errx(-6, "must be root"); + struct __user_cap_header_struct cap_header_data; + cap_user_header_t cap_header = &cap_header_data; + struct __user_cap_data_struct cap_data_data; + cap_user_data_t cap_data = &cap_data_data; + extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); + int do_exit = 0; + + /* check for CAP_SYS_RAWIO */ + cap_header->pid = getpid(); + cap_header->version = _LINUX_CAPABILITY_VERSION; + if (capget(cap_header, cap_data) < 0) + err(-6, "capget(2) failed"); + + if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { + do_exit++; + warnx("capget(CAP_SYS_RAWIO) failed," + " try \"# setcap cap_sys_rawio=ep %s\"", progname); + } + + /* test file permissions */ + if (euidaccess("/dev/cpu/0/msr", R_OK)) { + do_exit++; + warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); + } + + /* if all else fails, thell them to be root */ + if (do_exit) + if (getuid() != 0) + warnx("... or simply run as root"); + + if (do_exit) + exit(-6); } -int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) +/* + * NHM adds support for additional MSRs: + * + * MSR_SMI_COUNT 0x00000034 + * + * MSR_NHM_PLATFORM_INFO 0x000000ce + * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 + * + * MSR_PKG_C3_RESIDENCY 0x000003f8 + * MSR_PKG_C6_RESIDENCY 0x000003f9 + * MSR_CORE_C3_RESIDENCY 0x000003fc + * MSR_CORE_C6_RESIDENCY 0x000003fd + * + * Side effect: + * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL + */ +int probe_nhm_msrs(unsigned int family, unsigned int model) { + unsigned long long msr; + int *pkg_cstate_limits; + if (!genuine_intel) return 0; @@ -1482,24 +1555,54 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x1F: /* Core i7 and i5 Processor - Nehalem */ case 0x25: /* Westmere Client - Clarkdale, Arrandale */ case 0x2C: /* Westmere EP - Gulftown */ + case 0x2E: /* Nehalem-EX Xeon - Beckton */ + case 0x2F: /* Westmere-EX Xeon - Eagleton */ + pkg_cstate_limits = nhm_pkg_cstate_limits; + break; case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ case 0x3E: /* IVB Xeon */ + pkg_cstate_limits = snb_pkg_cstate_limits; + break; case 0x3C: /* HSW */ case 0x3F: /* HSX */ case 0x45: /* HSW */ case 0x46: /* HSW */ - case 0x37: /* BYT */ - case 0x4D: /* AVN */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ - return 1; + pkg_cstate_limits = hsw_pkg_cstate_limits; + break; + case 0x37: /* BYT */ + case 0x4D: /* AVN */ + pkg_cstate_limits = slv_pkg_cstate_limits; + break; + case 0x4C: /* AMT */ + pkg_cstate_limits = amt_pkg_cstate_limits; + break; + case 0x57: /* PHI */ + pkg_cstate_limits = phi_pkg_cstate_limits; + break; + default: + return 0; + } + get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + + pkg_cstate_limit = pkg_cstate_limits[msr & 0x7]; + + return 1; +} +int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + switch (model) { + /* Nehalem compatible, but do not include turbo-ratio limit support */ case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ - default: return 0; + default: + return 1; } } int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) @@ -1564,6 +1667,103 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * print_perf_limit() + */ +int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + cpu = t->cpu_id; + + /* per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_core_perf_limit_reasons) { + get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 2) ? "bit2, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 5) ? "Auto-HWP, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 9) ? "CorePwr, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : "", + (msr & 1 << 12) ? "MultiCoreTurbo, " : "", + (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 14) ? "bit14, " : "", + (msr & 1 << 15) ? "bit15, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 18) ? "bit18, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 21) ? "Auto-HWP, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 25) ? "CorePwr, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : "", + (msr & 1 << 28) ? "MultiCoreTurbo, " : "", + (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 30) ? "bit30, " : "", + (msr & 1 << 31) ? "bit31, " : ""); + + } + if (do_gfx_perf_limit_reasons) { + get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 9) ? "GFXPwr, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 25) ? "GFXPwr, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + if (do_ring_perf_limit_reasons) { + get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + return 0; +} + #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ @@ -1609,6 +1809,7 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x45: /* HSW */ case 0x46: /* HSW */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; case 0x3F: /* HSX */ @@ -1647,12 +1848,33 @@ void rapl_probe(unsigned int family, unsigned int model) tdp = get_tdp(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; - if (verbose) + if (debug) fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; } +void perf_limit_reasons_probe(family, model) +{ + if (!genuine_intel) + return; + + if (family != 6) + return; + + switch (model) { + case 0x3C: /* HSW */ + case 0x45: /* HSW */ + case 0x46: /* HSW */ + do_gfx_perf_limit_reasons = 1; + case 0x3F: /* HSX */ + do_core_perf_limit_reasons = 1; + do_ring_perf_limit_reasons = 1; + default: + return; + } +} + int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; @@ -1751,7 +1973,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) return -1; - if (verbose) { + if (debug) { fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, rapl_power_units, rapl_energy_units, rapl_time_units); @@ -1808,7 +2030,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) print_power_limit_msr(cpu, msr, "DRAM Limit"); } if (do_rapl & RAPL_CORE_POLICY) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; @@ -1816,7 +2038,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } } if (do_rapl & RAPL_CORES) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; @@ -1826,7 +2048,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } } if (do_rapl & RAPL_GFX) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -1842,8 +2064,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * SNB adds support for additional MSRs: + * + * MSR_PKG_C7_RESIDENCY 0x000003fa + * MSR_CORE_C7_RESIDENCY 0x000003fe + * MSR_PKG_C2_RESIDENCY 0x0000060d + */ -int is_snb(unsigned int family, unsigned int model) +int has_snb_msrs(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; @@ -1858,6 +2087,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x45: /* HSW */ case 0x46: /* HSW */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ return 1; @@ -1865,7 +2095,14 @@ int is_snb(unsigned int family, unsigned int model) return 0; } -int has_c8_c9_c10(unsigned int family, unsigned int model) +/* + * HSW adds support for additional MSRs: + * + * MSR_PKG_C8_RESIDENCY 0x00000630 + * MSR_PKG_C9_RESIDENCY 0x00000631 + * MSR_PKG_C10_RESIDENCY 0x00000632 + */ +int has_hsw_msrs(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; @@ -1917,7 +2154,7 @@ double slm_bclk(void) double discover_bclk(unsigned int family, unsigned int model) { - if (is_snb(family, model)) + if (has_snb_msrs(family, model)) return 100.00; else if (is_slm(family, model)) return slm_bclk(); @@ -1965,7 +2202,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk } /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nehalem_platform_info) + if (!do_nhm_platform_info) goto guess; if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) @@ -1973,7 +2210,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; - if (verbose) + if (debug) fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); @@ -2003,7 +2240,7 @@ void check_cpuid() if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; - if (verbose) + if (debug) fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); @@ -2014,7 +2251,7 @@ void check_cpuid() if (family == 6 || family == 0xf) model += ((fms >> 16) & 0xf) << 4; - if (verbose) + if (debug) fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); @@ -2029,18 +2266,15 @@ void check_cpuid() ebx = ecx = edx = 0; __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); - if (max_level < 0x80000007) - errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level); + if (max_level >= 0x80000007) { - /* - * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 - * this check is valid for both Intel and AMD - */ - __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - has_invariant_tsc = edx & (1 << 8); - - if (!has_invariant_tsc) - errx(1, "No invariant TSC"); + /* + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + * this check is valid for both Intel and AMD + */ + __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + has_invariant_tsc = edx & (1 << 8); + } /* * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 @@ -2053,36 +2287,51 @@ void check_cpuid() do_ptm = eax & (1 << 6); has_epb = ecx & (1 << 3); - if (verbose) - fprintf(stderr, "CPUID(6): %s%s%s%s\n", - has_aperf ? "APERF" : "No APERF!", - do_dts ? ", DTS" : "", - do_ptm ? ", PTM": "", - has_epb ? ", EPB": ""); - - if (!has_aperf) - errx(-1, "No APERF"); - - do_nehalem_platform_info = genuine_intel && has_invariant_tsc; - do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ - do_smi = do_nhm_cstates; - do_snb_cstates = is_snb(family, model); - do_c8_c9_c10 = has_c8_c9_c10(family, model); + if (debug) + fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", + has_aperf ? "" : "No ", + do_dts ? "" : "No ", + do_ptm ? "" : "No ", + has_epb ? "" : "No "); + + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); + do_snb_cstates = has_snb_msrs(family, model); + do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); + do_pc3 = (pkg_cstate_limit >= PCL__3); + do_pc6 = (pkg_cstate_limit >= PCL__6); + do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); + do_c8_c9_c10 = has_hsw_msrs(family, model); do_slm_cstates = is_slm(family, model); bclk = discover_bclk(family, model); - do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); + do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model); do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); rapl_probe(family, model); + perf_limit_reasons_probe(family, model); return; } -void usage() +void help() { - errx(1, "%s: [-v][-R][-T][-p|-P|-S][-c MSR#][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", - progname); + fprintf(stderr, + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" + "\n" + "Turbostat forks the specified COMMAND and prints statistics\n" + "when COMMAND completes.\n" + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" + "to print statistics, until interrupted.\n" + "--debug run in \"debug\" mode\n" + "--interval sec Override default 5-second measurement interval\n" + "--help print this help message\n" + "--counter msr print 32-bit counter at address \"msr\"\n" + "--Counter msr print 64-bit Counter at address \"msr\"\n" + "--msr msr print 32-bit value at address \"msr\"\n" + "--MSR msr print 64-bit Value at address \"msr\"\n" + "--version print version information\n" + "\n" + "For more help, run \"man turbostat\"\n"); } @@ -2121,7 +2370,7 @@ void topology_probe() if (!summary_only && topo.num_cpus > 1) show_cpu = 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); @@ -2156,7 +2405,7 @@ void topology_probe() int siblings; if (cpu_is_not_present(i)) { - if (verbose > 1) + if (debug > 1) fprintf(stderr, "cpu%d NOT PRESENT\n", i); continue; } @@ -2171,26 +2420,26 @@ void topology_probe() siblings = get_num_ht_siblings(i); if (siblings > max_siblings) max_siblings = siblings; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "cpu %d pkg %d core %d\n", i, cpus[i].physical_package_id, cpus[i].core_id); } topo.num_cores_per_pkg = max_core_id + 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); if (!summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); if (!summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_siblings %d\n", max_siblings); free(cpus); @@ -2299,25 +2548,27 @@ void setup_all_buffers(void) void turbostat_init() { - check_cpuid(); - check_dev_msr(); - check_super_user(); + check_permissions(); + check_cpuid(); setup_all_buffers(); - if (verbose) + if (debug) print_verbose_header(); - if (verbose) + if (debug) for_all_cpus(print_epb, ODD_COUNTERS); - if (verbose) + if (debug) + for_all_cpus(print_perf_limit, ODD_COUNTERS); + + if (debug) for_all_cpus(print_rapl, ODD_COUNTERS); for_all_cpus(set_temperature_target, ODD_COUNTERS); - if (verbose) + if (debug) for_all_cpus(print_thermal, ODD_COUNTERS); } @@ -2382,56 +2633,82 @@ int get_and_dump_counters(void) return status; } +void print_version() { + fprintf(stderr, "turbostat version 4.1 10-Feb, 2015" + " - Len Brown <lenb@kernel.org>\n"); +} + void cmdline(int argc, char **argv) { int opt; + int option_index = 0; + static struct option long_options[] = { + {"Counter", required_argument, 0, 'C'}, + {"counter", required_argument, 0, 'c'}, + {"Dump", no_argument, 0, 'D'}, + {"debug", no_argument, 0, 'd'}, + {"interval", required_argument, 0, 'i'}, + {"help", no_argument, 0, 'h'}, + {"Joules", no_argument, 0, 'J'}, + {"MSR", required_argument, 0, 'M'}, + {"msr", required_argument, 0, 'm'}, + {"Package", no_argument, 0, 'p'}, + {"processor", no_argument, 0, 'p'}, + {"Summary", no_argument, 0, 'S'}, + {"TCC", required_argument, 0, 'T'}, + {"version", no_argument, 0, 'v' }, + {0, 0, 0, 0 } + }; progname = argv[0]; - while ((opt = getopt(argc, argv, "+pPsSvi:c:C:m:M:RJT:")) != -1) { + while ((opt = getopt_long_only(argc, argv, "C:c:Ddhi:JM:m:PpST:v", + long_options, &option_index)) != -1) { switch (opt) { - case 'p': - show_core_only++; + case 'C': + sscanf(optarg, "%x", &extra_delta_offset64); break; - case 'P': - show_pkg_only++; + case 'c': + sscanf(optarg, "%x", &extra_delta_offset32); break; - case 's': + case 'D': dump_only++; break; - case 'S': - summary_only++; - break; - case 'v': - verbose++; + case 'd': + debug++; break; + case 'h': + default: + help(); + exit(1); case 'i': interval_sec = atoi(optarg); break; - case 'c': - sscanf(optarg, "%x", &extra_delta_offset32); + case 'J': + rapl_joules++; break; - case 'C': - sscanf(optarg, "%x", &extra_delta_offset64); + case 'M': + sscanf(optarg, "%x", &extra_msr_offset64); break; case 'm': sscanf(optarg, "%x", &extra_msr_offset32); break; - case 'M': - sscanf(optarg, "%x", &extra_msr_offset64); + case 'P': + show_pkg_only++; break; - case 'R': - rapl_verbose++; + case 'p': + show_core_only++; + break; + case 'S': + summary_only++; break; case 'T': tcc_activation_temp_override = atoi(optarg); break; - case 'J': - rapl_joules++; + case 'v': + print_version(); + exit(0); break; - - default: - usage(); } } } @@ -2440,9 +2717,8 @@ int main(int argc, char **argv) { cmdline(argc, argv); - if (verbose) - fprintf(stderr, "turbostat v3.7 Feb 6, 2014" - " - Len Brown <lenb@kernel.org>\n"); + if (debug) + print_version(); turbostat_init(); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index b9cd036f0442..d08e214ec6e7 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -178,6 +178,7 @@ my $checkout; my $localversion; my $iteration = 0; my $successes = 0; +my $stty_orig; my $bisect_good; my $bisect_bad; @@ -197,6 +198,11 @@ my $patchcheck_start; my $patchcheck_cherry; my $patchcheck_end; +my $build_time; +my $install_time; +my $reboot_time; +my $test_time; + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -554,6 +560,66 @@ sub get_mandatory_config { } } +sub show_time { + my ($time) = @_; + + my $hours = 0; + my $minutes = 0; + + if ($time > 3600) { + $hours = int($time / 3600); + $time -= $hours * 3600; + } + if ($time > 60) { + $minutes = int($time / 60); + $time -= $minutes * 60; + } + + if ($hours > 0) { + doprint "$hours hour"; + doprint "s" if ($hours > 1); + doprint " "; + } + + if ($minutes > 0) { + doprint "$minutes minute"; + doprint "s" if ($minutes > 1); + doprint " "; + } + + doprint "$time second"; + doprint "s" if ($time != 1); +} + +sub print_times { + doprint "\n"; + if ($build_time) { + doprint "Build time: "; + show_time($build_time); + doprint "\n"; + } + if ($install_time) { + doprint "Install time: "; + show_time($install_time); + doprint "\n"; + } + if ($reboot_time) { + doprint "Reboot time: "; + show_time($reboot_time); + doprint "\n"; + } + if ($test_time) { + doprint "Test time: "; + show_time($test_time); + doprint "\n"; + } + # reset for iterations like bisect + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; +} + sub get_mandatory_configs { get_mandatory_config("MACHINE"); get_mandatory_config("BUILD_DIR"); @@ -1341,23 +1407,83 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($monitor_cnt) { + # restore terminal settings + system("stty $stty_orig"); + } + die @_, "\n"; } +sub create_pty { + my ($ptm, $pts) = @_; + my $tmp; + my $TIOCSPTLCK = 0x40045431; + my $TIOCGPTN = 0x80045430; + + sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or + dodie "Cant open /dev/ptmx"; + + # unlockpt() + $tmp = pack("i", 0); + ioctl($ptm, $TIOCSPTLCK, $tmp) or + dodie "ioctl TIOCSPTLCK for /dev/ptmx failed"; + + # ptsname() + ioctl($ptm, $TIOCGPTN, $tmp) or + dodie "ioctl TIOCGPTN for /dev/ptmx failed"; + $tmp = unpack("i", $tmp); + + sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or + dodie "Can't open /dev/pts/$tmp"; +} + +sub exec_console { + my ($ptm, $pts) = @_; + + close($ptm); + + close(\*STDIN); + close(\*STDOUT); + close(\*STDERR); + + open(\*STDIN, '<&', $pts); + open(\*STDOUT, '>&', $pts); + open(\*STDERR, '>&', $pts); + + close($pts); + + exec $console or + die "Can't open console $console"; +} + sub open_console { - my ($fp) = @_; + my ($ptm) = @_; + my $pts = \*PTSFD; + my $pid; - my $flags; + # save terminal settings + $stty_orig = `stty -g`; - my $pid = open($fp, "$console|") or - dodie "Can't open console $console"; + # place terminal in cbreak mode so that stdin can be read one character at + # a time without having to wait for a newline + system("stty -icanon -echo -icrnl"); - $flags = fcntl($fp, F_GETFL, 0) or - dodie "Can't get flags for the socket: $!"; - $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or - dodie "Can't set flags for the socket: $!"; + create_pty($ptm, $pts); + + $pid = fork; + + if (!$pid) { + # child + exec_console($ptm, $pts) + } + + # parent + close($pts); return $pid; + + open(PTSFD, "Stop perl from warning about single use of PTSFD"); } sub close_console { @@ -1368,6 +1494,9 @@ sub close_console { print "closing!\n"; close($fp); + + # restore terminal settings + system("stty $stty_orig"); } sub start_monitor { @@ -1519,6 +1648,8 @@ sub fail { $name = " ($test_name)"; } + print_times; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; @@ -1534,10 +1665,14 @@ sub fail { sub run_command { my ($command, $redirect) = @_; + my $start_time; + my $end_time; my $dolog = 0; my $dord = 0; my $pid; + $start_time = time; + $command =~ s/\$SSH_USER/$ssh_user/g; $command =~ s/\$MACHINE/$machine/g; @@ -1570,6 +1705,15 @@ sub run_command { close(LOG) if ($dolog); close(RD) if ($dord); + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + if ($failed) { doprint "FAILED!\n"; } else { @@ -1694,7 +1838,9 @@ sub wait_for_input { my ($fp, $time) = @_; my $rin; - my $ready; + my $rout; + my $nr; + my $buf; my $line; my $ch; @@ -1704,21 +1850,36 @@ sub wait_for_input $rin = ''; vec($rin, fileno($fp), 1) = 1; - ($ready, $time) = select($rin, undef, undef, $time); + vec($rin, fileno(\*STDIN), 1) = 1; - $line = ""; + while (1) { + $nr = select($rout=$rin, undef, undef, $time); - # try to read one char at a time - while (sysread $fp, $ch, 1) { - $line .= $ch; - last if ($ch eq "\n"); - } + if ($nr <= 0) { + return undef; + } - if (!length($line)) { - return undef; - } + # copy data from stdin to the console + if (vec($rout, fileno(\*STDIN), 1) == 1) { + sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, 1000); + next; + } - return $line; + $line = ""; + + # try to read one char at a time + while (sysread $fp, $ch, 1) { + $line .= $ch; + last if ($ch eq "\n"); + } + + if (!length($line)) { + return undef; + } + + return $line; + } } sub reboot_to { @@ -1766,6 +1927,8 @@ sub monitor { my $skip_call_trace = 0; my $loops; + my $start_time = time; + wait_for_monitor 5; my $line; @@ -1890,6 +2053,9 @@ sub monitor { } } + my $end_time = time; + $reboot_time = $end_time - $start_time; + close(DMESG); if ($bug) { @@ -1938,6 +2104,8 @@ sub install { return if ($no_install); + my $start_time = time; + if (defined($pre_install)) { my $cp_pre_install = eval_kernel_version $pre_install; run_command "$cp_pre_install" or @@ -1969,6 +2137,8 @@ sub install { if (!$install_mods) { do_post_install; doprint "No modules needed\n"; + my $end_time = time; + $install_time = $end_time - $start_time; return; } @@ -1996,6 +2166,9 @@ sub install { run_ssh "rm -f /tmp/$modtar"; do_post_install; + + my $end_time = time; + $install_time = $end_time - $start_time; } sub get_version { @@ -2008,7 +2181,7 @@ sub get_version { $have_version = 1; } -sub start_monitor_and_boot { +sub start_monitor_and_install { # Make sure the stable kernel has finished booting # Install bisects, don't need console @@ -2208,6 +2381,8 @@ sub build { unlink $buildlog; + my $start_time = time; + # Failed builds should not reboot the target my $save_no_reboot = $no_reboot; $no_reboot = 1; @@ -2293,6 +2468,9 @@ sub build { $no_reboot = $save_no_reboot; + my $end_time = time; + $build_time = $end_time - $start_time; + return 1; } @@ -2323,6 +2501,8 @@ sub success { $name = " ($test_name)"; } + print_times; + doprint "\n\n*******************************************\n"; doprint "*******************************************\n"; doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; @@ -2383,6 +2563,8 @@ sub do_run_test { my $bug = 0; my $bug_ignored = 0; + my $start_time = time; + wait_for_monitor 1; doprint "run test $run_test\n"; @@ -2449,6 +2631,9 @@ sub do_run_test { waitpid $child_pid, 0; $child_exit = $?; + my $end_time = time; + $test_time = $end_time - $start_time; + if (!$bug && $in_bisect) { if (defined($bisect_ret_good)) { if ($child_exit == $bisect_ret_good) { @@ -2549,7 +2734,7 @@ sub run_bisect_test { dodie "Failed on build" if $failed; # Now boot the box - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type ne "boot") { if ($failed && $bisect_skip) { @@ -2755,6 +2940,7 @@ sub bisect { do { $result = run_bisect $type; $test = run_git_bisect "git bisect $result"; + print_times; } while ($test); run_command "git bisect log" or @@ -3168,6 +3354,7 @@ sub config_bisect { do { $ret = run_config_bisect \%good_configs, \%bad_configs; + print_times; } while (!$ret); return $ret if ($ret < 0); @@ -3260,7 +3447,7 @@ sub patchcheck { my $sha1 = $item; $sha1 =~ s/^([[:xdigit:]]+).*/$1/; - doprint "\nProcessing commit $item\n\n"; + doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or die "Failed to checkout $sha1"; @@ -3291,16 +3478,18 @@ sub patchcheck { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $type ne "boot"){ do_run_test or $failed = 1; } end_monitor; - return 0 if ($failed); - + if ($failed) { + print_times; + return 0; + } patchcheck_reboot; - + print_times; } $in_patchcheck = 0; success $i; @@ -3753,7 +3942,7 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type eq "test" && !$failed) { do_run_test or $failed = 1; @@ -4000,6 +4189,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $iteration = $i; + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; + undef %force_config; my $makecmd = set_test_option("MAKE_CMD", $i); @@ -4157,15 +4351,20 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if ($test_type ne "build") { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $test_type ne "boot" && defined($run_test)) { do_run_test or $failed = 1; } end_monitor; - next if ($failed); + if ($failed) { + print_times; + next; + } } + print_times; + success $i; } diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index f6ff90a76bd7..1d5e7ad2c460 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR export CC CFLAGS -TARGETS = pmu copyloops mm tm primitives +TARGETS = pmu copyloops mm tm primitives stringloops endif diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore new file mode 100644 index 000000000000..25a192f62c4d --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -0,0 +1,4 @@ +copyuser_64 +copyuser_power7 +memcpy_64 +memcpy_power7 diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore new file mode 100644 index 000000000000..b43ade0ec861 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -0,0 +1,3 @@ +hugetlb_vs_thp_test +subpage_prot +tempfile diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 357ccbd6bad9..a14c538dd7f8 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,9 +1,9 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test +PROGS := hugetlb_vs_thp_test subpage_prot -all: $(PROGS) +all: $(PROGS) tempfile $(PROGS): ../harness.c @@ -12,7 +12,10 @@ run_tests: all ./$$PROG; \ done; +tempfile: + dd if=/dev/zero of=tempfile bs=64k count=1 + clean: - rm -f $(PROGS) + rm -f $(PROGS) tempfile .PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c new file mode 100644 index 000000000000..440180ff8089 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -0,0 +1,220 @@ +/* + * Copyright IBM Corp. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + +char *file_name; + +int in_test; +volatile int faulted; +volatile void *dar; +int errors; + +static void segv(int signum, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + if (!in_test) { + fprintf(stderr, "Segfault outside of test !\n"); + exit(1); + } + + faulted = 1; + dar = (void *)regs->dar; + regs->nip += 4; +} + +static inline void do_read(const volatile void *addr) +{ + int ret; + + asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n" + : "=r" (ret) : "r" (addr) : "memory"); +} + +static inline void do_write(const volatile void *addr) +{ + int val = 0x1234567; + + asm volatile("stw %0,0(%1); sync; \n" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void check_faulted(void *addr, long page, long subpage, int write) +{ + int want_fault = (subpage == ((page + 3) % 16)); + + if (write) + want_fault |= (subpage == ((page + 1) % 16)); + + if (faulted != want_fault) { + printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + addr, page, subpage, write, + want_fault ? "fault" : "pass", + faulted ? "fault" : "pass"); + ++errors; + } + + if (faulted) { + if (dar != addr) { + printf("Fault expected at 0x%p and happened at 0x%p !\n", + addr, dar); + } + faulted = 0; + asm volatile("sync" : : : "memory"); + } +} + +static int run_test(void *addr, unsigned long size) +{ + unsigned int *map; + long i, j, pages, err; + + pages = size / 0x10000; + map = malloc(pages * 4); + assert(map); + + /* + * for each page, mark subpage i % 16 read only and subpage + * (i + 3) % 16 inaccessible + */ + for (i = 0; i < pages; i++) { + map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) | + (0xc0000000 >> (((i + 3) * 2) % 32)); + } + + err = syscall(__NR_subpage_prot, addr, size, map); + if (err) { + perror("subpage_perm"); + return 1; + } + free(map); + + in_test = 1; + errors = 0; + for (i = 0; i < pages; i++) { + for (j = 0; j < 16; j++, addr += 0x1000) { + do_read(addr); + check_faulted(addr, i, j, 0); + do_write(addr); + check_faulted(addr, i, j, 1); + } + } + + in_test = 0; + if (errors) { + printf("%d errors detected\n", errors); + return 1; + } + + return 0; +} + +int test_anon(void) +{ + unsigned long align; + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *mallocblock; + unsigned long mallocsize; + + if (getpagesize() != 0x10000) { + fprintf(stderr, "Kernel page size must be 64K!\n"); + return 1; + } + + sigaction(SIGSEGV, &act, NULL); + + mallocsize = 4 * 16 * 1024 * 1024; + + FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize)); + + align = (unsigned long)mallocblock; + if (align & 0xffff) + align = (align | 0xffff) + 1; + + mallocblock = (void *)align; + + printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + mallocsize, mallocblock); + + printf("testing malloc block...\n"); + + return run_test(mallocblock, mallocsize); +} + +int test_file(void) +{ + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *fileblock; + off_t filesize; + int fd; + + fd = open(file_name, O_RDWR); + if (fd == -1) { + perror("failed to open file"); + return 1; + } + sigaction(SIGSEGV, &act, NULL); + + filesize = lseek(fd, 0, SEEK_END); + if (filesize & 0xffff) + filesize &= ~0xfffful; + + fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (fileblock == MAP_FAILED) { + perror("failed to map file"); + return 1; + } + printf("allocated %s for 0x%lx bytes at 0x%p\n", + file_name, filesize, fileblock); + + printf("testing file map...\n"); + + return run_test(fileblock, filesize); +} + +int main(int argc, char *argv[]) +{ + test_harness(test_anon, "subpage_prot_anon"); + + if (argc > 1) + file_name = argv[1]; + else + file_name = "tempfile"; + + test_harness(test_file, "subpage_prot_file"); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore new file mode 100644 index 000000000000..e748f336eed3 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/.gitignore @@ -0,0 +1,3 @@ +count_instructions +l3_bank_test +per_event_excludes diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore new file mode 100644 index 000000000000..42bddbed8b64 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -0,0 +1,22 @@ +reg_access_test +event_attributes_test +cycles_test +cycles_with_freeze_test +pmc56_overflow_test +ebb_vs_cpu_event_test +cpu_event_vs_ebb_test +cpu_event_pinned_vs_ebb_test +task_event_vs_ebb_test +task_event_pinned_vs_ebb_test +multi_ebb_procs_test +multi_counter_test +pmae_handling_test +close_clears_pmcc_test +instruction_count_test +fork_cleanup_test +ebb_on_child_test +ebb_on_willing_child_test +back_to_back_ebbs_test +lost_exception_test +no_handler_test +cycles_with_mmcr2_test diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore new file mode 100644 index 000000000000..4cc4e31bed1d --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/.gitignore @@ -0,0 +1 @@ +load_unaligned_zeropad diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore new file mode 100644 index 000000000000..0b43da74ee46 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/.gitignore @@ -0,0 +1 @@ +memcmp diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile new file mode 100644 index 000000000000..506d77346477 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -0,0 +1,20 @@ +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) + +PROGS := memcmp +EXTRA_SOURCES := memcmp_64.S ../harness.c + +all: $(PROGS) + +$(PROGS): $(EXTRA_SOURCES) + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) *.o + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h new file mode 100644 index 000000000000..11bece87e880 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h @@ -0,0 +1,7 @@ +#include <ppc-asm.h> + +#ifndef r1 +#define r1 sp +#endif + +#define _GLOBAL(A) FUNC_START(test_ ## A) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c new file mode 100644 index 000000000000..17417dd70708 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -0,0 +1,103 @@ +#include <malloc.h> +#include <stdlib.h> +#include <string.h> +#include "../utils.h" + +#define SIZE 256 +#define ITERATIONS 10000 + +int test_memcmp(const void *s1, const void *s2, size_t n); + +/* test all offsets and lengths */ +static void test_one(char *s1, char *s2) +{ + unsigned long offset, size; + + for (offset = 0; offset < SIZE; offset++) { + for (size = 0; size < (SIZE-offset); size++) { + int x, y; + unsigned long i; + + y = memcmp(s1+offset, s2+offset, size); + x = test_memcmp(s1+offset, s2+offset, size); + + if (((x ^ y) < 0) && /* Trick to compare sign */ + ((x | y) != 0)) { /* check for zero */ + printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s1[i]); + printf("\n"); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s2[i]); + printf("\n"); + abort(); + } + } + } +} + +static int testcase(void) +{ + char *s1; + char *s2; + unsigned long i; + + s1 = memalign(128, SIZE); + if (!s1) { + perror("memalign"); + exit(1); + } + + s2 = memalign(128, SIZE); + if (!s2) { + perror("memalign"); + exit(1); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change one byte */ + change = random() % SIZE; + s2[change] = random() & 0xff; + + test_one(s1, s2); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change multiple bytes, 1/8 of total */ + for (j = 0; j < SIZE / 8; j++) { + change = random() % SIZE; + s2[change] = random() & 0xff; + } + + test_one(s1, s2); + } + + return 0; +} + +int main(void) +{ + return test_harness(testcase, "memcmp"); +} diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S new file mode 120000 index 000000000000..9bc87e438ae9 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/memcmp_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore new file mode 100644 index 000000000000..33d02cc54a3e --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -0,0 +1 @@ +tm-resched-dscr diff --git a/tools/usb/ffs-aio-example/multibuff/host_app/test.c b/tools/usb/ffs-aio-example/multibuff/host_app/test.c index daa3abe6bebd..2cbcce6e8dd7 100644 --- a/tools/usb/ffs-aio-example/multibuff/host_app/test.c +++ b/tools/usb/ffs-aio-example/multibuff/host_app/test.c @@ -33,11 +33,6 @@ #define VENDOR 0x1d6b #define PRODUCT 0x0105 -/* endpoints indexes */ - -#define EP_BULK_IN (1 | LIBUSB_ENDPOINT_IN) -#define EP_BULK_OUT (2 | LIBUSB_ENDPOINT_OUT) - #define BUF_LEN 8192 /* @@ -159,14 +154,21 @@ void test_exit(struct test_state *state) int main(void) { struct test_state state; + struct libusb_config_descriptor *conf; + struct libusb_interface_descriptor const *iface; + unsigned char addr; if (test_init(&state)) return 1; + libusb_get_config_descriptor(state.found, 0, &conf); + iface = &conf->interface[0].altsetting[0]; + addr = iface->endpoint[0].bEndpointAddress; + while (1) { static unsigned char buffer[BUF_LEN]; int bytes; - libusb_bulk_transfer(state.handle, EP_BULK_IN, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, addr, buffer, BUF_LEN, &bytes, 500); } test_exit(&state); diff --git a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c index adc310a6d489..1f44a29818bf 100644 --- a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c +++ b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c @@ -103,12 +103,14 @@ static const struct { .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 1 | USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = htole16(512), }, .bulk_source = { .bLength = sizeof(descriptors.hs_descs.bulk_source), .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 2 | USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = htole16(512), }, }, }; diff --git a/tools/usb/ffs-aio-example/simple/host_app/test.c b/tools/usb/ffs-aio-example/simple/host_app/test.c index acd6332811f3..aed86ffff280 100644 --- a/tools/usb/ffs-aio-example/simple/host_app/test.c +++ b/tools/usb/ffs-aio-example/simple/host_app/test.c @@ -33,11 +33,6 @@ #define VENDOR 0x1d6b #define PRODUCT 0x0105 -/* endpoints indexes */ - -#define EP_BULK_IN (1 | LIBUSB_ENDPOINT_IN) -#define EP_BULK_OUT (2 | LIBUSB_ENDPOINT_OUT) - #define BUF_LEN 8192 /* @@ -159,16 +154,24 @@ void test_exit(struct test_state *state) int main(void) { struct test_state state; + struct libusb_config_descriptor *conf; + struct libusb_interface_descriptor const *iface; + unsigned char in_addr, out_addr; if (test_init(&state)) return 1; + libusb_get_config_descriptor(state.found, 0, &conf); + iface = &conf->interface[0].altsetting[0]; + in_addr = iface->endpoint[0].bEndpointAddress; + out_addr = iface->endpoint[1].bEndpointAddress; + while (1) { static unsigned char buffer[BUF_LEN]; int bytes; - libusb_bulk_transfer(state.handle, EP_BULK_IN, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, in_addr, buffer, BUF_LEN, &bytes, 500); - libusb_bulk_transfer(state.handle, EP_BULK_OUT, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, out_addr, buffer, BUF_LEN, &bytes, 500); } test_exit(&state); diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 264fbc297e0b..8bdf16b8ba60 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -133,6 +133,7 @@ static const char * const page_flag_names[] = { [KPF_KSM] = "x:ksm", [KPF_THP] = "t:thp", [KPF_BALLOON] = "o:balloon", + [KPF_ZERO_PAGE] = "z:zero_page", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", |