diff options
Diffstat (limited to 'tools')
394 files changed, 20337 insertions, 3933 deletions
diff --git a/tools/Makefile b/tools/Makefile index f41e7c6ea23e..6bf68fe7dd29 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -21,6 +21,7 @@ help: @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @echo ' spi - spi tools' + @echo ' objtool - an ELF object analysis tool' @echo ' tmon - thermal monitoring and tuning tool' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @@ -54,7 +55,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm net iio gpio: FORCE +cgroup firewire hv guest spi usb virtio vm net iio gpio objtool: FORCE $(call descend,$@) liblockdep: FORCE @@ -86,7 +87,7 @@ freefall: FORCE all: acpi cgroup cpupower hv firewire lguest \ perf selftests turbostat usb \ virtio vm net x86_energy_perf_policy \ - tmon freefall + tmon freefall objtool acpi_install: $(call descend,power/$(@:_install=),install) @@ -94,7 +95,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install: +cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install: $(call descend,$(@:_install=),install) selftests_install: @@ -112,7 +113,7 @@ freefall_install: install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ - tmon_install freefall_install + tmon_install freefall_install objtool_install acpi_clean: $(call descend,power/acpi,clean) @@ -120,7 +121,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean: +cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -136,7 +137,8 @@ libsubcmd_clean: $(call descend,lib/subcmd,clean) perf_clean: - $(call descend,$(@:_clean=),clean) + $(Q)mkdir -p $(PERF_O) . + $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean selftests_clean: $(call descend,testing/$(@:_clean=),clean) @@ -157,6 +159,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_cle perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ - gpio_clean + gpio_clean objtool_clean .PHONY: FORCE diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 6b7707270aa3..57c8f98874e8 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -30,6 +30,7 @@ endef FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ + dwarf_getlocations \ fortify-source \ sync-compare-and-swap \ glibc \ @@ -48,6 +49,10 @@ FEATURE_TESTS_BASIC := \ libslang \ libcrypto \ libunwind \ + libunwind-x86 \ + libunwind-x86_64 \ + libunwind-arm \ + libunwind-aarch64 \ pthread-attr-setaffinity-np \ stackprotector-all \ timerfd \ @@ -68,7 +73,9 @@ FEATURE_TESTS_EXTRA := \ libbabeltrace \ liberty \ liberty-z \ - libunwind-debug-frame + libunwind-debug-frame \ + libunwind-debug-frame-arm \ + libunwind-debug-frame-aarch64 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) @@ -78,6 +85,7 @@ endif FEATURE_DISPLAY ?= \ dwarf \ + dwarf_getlocations \ glibc \ gtk2 \ libaudit \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index c5f4c417428d..3d88f09e188b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -3,6 +3,7 @@ FILES= \ test-backtrace.bin \ test-bionic.bin \ test-dwarf.bin \ + test-dwarf_getlocations.bin \ test-fortify-source.bin \ test-sync-compare-and-swap.bin \ test-glibc.bin \ @@ -26,6 +27,12 @@ FILES= \ test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ + test-libunwind-x86.bin \ + test-libunwind-x86_64.bin \ + test-libunwind-arm.bin \ + test-libunwind-aarch64.bin \ + test-libunwind-debug-frame-arm.bin \ + test-libunwind-debug-frame-aarch64.bin \ test-pthread-attr-setaffinity-np.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ @@ -82,6 +89,9 @@ endif $(OUTPUT)test-dwarf.bin: $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-dwarf_getlocations.bin: + $(BUILD) $(DWARFLIBS) + $(OUTPUT)test-libelf-mmap.bin: $(BUILD) -lelf @@ -99,6 +109,23 @@ $(OUTPUT)test-libunwind.bin: $(OUTPUT)test-libunwind-debug-frame.bin: $(BUILD) -lelf +$(OUTPUT)test-libunwind-x86.bin: + $(BUILD) -lelf -lunwind-x86 + +$(OUTPUT)test-libunwind-x86_64.bin: + $(BUILD) -lelf -lunwind-x86_64 + +$(OUTPUT)test-libunwind-arm.bin: + $(BUILD) -lelf -lunwind-arm + +$(OUTPUT)test-libunwind-aarch64.bin: + $(BUILD) -lelf -lunwind-aarch64 + +$(OUTPUT)test-libunwind-debug-frame-arm.bin: + $(BUILD) -lelf -lunwind-arm + +$(OUTPUT)test-libunwind-debug-frame-aarch64.bin: + $(BUILD) -lelf -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index e499a36c1e4a..a282e8cb84f3 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -41,6 +41,10 @@ # include "test-dwarf.c" #undef main +#define main main_test_dwarf_getlocations +# include "test-dwarf_getlocations.c" +#undef main + #define main main_test_libelf_getphdrnum # include "test-libelf-getphdrnum.c" #undef main @@ -143,6 +147,7 @@ int main(int argc, char *argv[]) main_test_libelf_mmap(); main_test_glibc(); main_test_dwarf(); + main_test_dwarf_getlocations(); main_test_libelf_getphdrnum(); main_test_libunwind(); main_test_libaudit(); diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index b389026839b9..e04ab89a1013 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -27,10 +27,9 @@ int main(void) attr.log_level = 0; attr.kern_version = 0; - attr = attr; /* * Test existence of __NR_bpf and BPF_PROG_LOAD. * This call should fail if we run the testcase. */ - return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr)); + return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c new file mode 100644 index 000000000000..70162699dd43 --- /dev/null +++ b/tools/build/feature/test-dwarf_getlocations.c @@ -0,0 +1,12 @@ +#include <stdlib.h> +#include <elfutils/libdw.h> + +int main(void) +{ + Dwarf_Addr base, start, end; + Dwarf_Attribute attr; + Dwarf_Op *op; + size_t nops; + ptrdiff_t offset = 0; + return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); +} diff --git a/tools/build/feature/test-libunwind-aarch64.c b/tools/build/feature/test-libunwind-aarch64.c new file mode 100644 index 000000000000..fc03fb64e8c1 --- /dev/null +++ b/tools/build/feature/test-libunwind-aarch64.c @@ -0,0 +1,26 @@ +#include <libunwind-aarch64.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +static unw_accessors_t accessors; + +int main(void) +{ + unw_addr_space_t addr_space; + + addr_space = unw_create_addr_space(&accessors, 0); + if (addr_space) + return 0; + + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + + return 0; +} diff --git a/tools/build/feature/test-libunwind-arm.c b/tools/build/feature/test-libunwind-arm.c new file mode 100644 index 000000000000..632d95ec641f --- /dev/null +++ b/tools/build/feature/test-libunwind-arm.c @@ -0,0 +1,27 @@ +#include <libunwind-arm.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +static unw_accessors_t accessors; + +int main(void) +{ + unw_addr_space_t addr_space; + + addr_space = unw_create_addr_space(&accessors, 0); + if (addr_space) + return 0; + + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + + return 0; +} diff --git a/tools/build/feature/test-libunwind-debug-frame-aarch64.c b/tools/build/feature/test-libunwind-debug-frame-aarch64.c new file mode 100644 index 000000000000..22844673fc26 --- /dev/null +++ b/tools/build/feature/test-libunwind-debug-frame-aarch64.c @@ -0,0 +1,16 @@ +#include <libunwind-aarch64.h> +#include <stdlib.h> + +extern int +UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, + unw_word_t ip, unw_word_t segbase, + const char *obj_name, unw_word_t start, + unw_word_t end); + +#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) + +int main(void) +{ + dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0); + return 0; +} diff --git a/tools/build/feature/test-libunwind-debug-frame-arm.c b/tools/build/feature/test-libunwind-debug-frame-arm.c new file mode 100644 index 000000000000..f98859684fee --- /dev/null +++ b/tools/build/feature/test-libunwind-debug-frame-arm.c @@ -0,0 +1,16 @@ +#include <libunwind-arm.h> +#include <stdlib.h> + +extern int +UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, + unw_word_t ip, unw_word_t segbase, + const char *obj_name, unw_word_t start, + unw_word_t end); + +#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) + +int main(void) +{ + dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0); + return 0; +} diff --git a/tools/build/feature/test-libunwind-x86.c b/tools/build/feature/test-libunwind-x86.c new file mode 100644 index 000000000000..3561edce305e --- /dev/null +++ b/tools/build/feature/test-libunwind-x86.c @@ -0,0 +1,27 @@ +#include <libunwind-x86.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +static unw_accessors_t accessors; + +int main(void) +{ + unw_addr_space_t addr_space; + + addr_space = unw_create_addr_space(&accessors, 0); + if (addr_space) + return 0; + + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + + return 0; +} diff --git a/tools/build/feature/test-libunwind-x86_64.c b/tools/build/feature/test-libunwind-x86_64.c new file mode 100644 index 000000000000..5add2517b2a1 --- /dev/null +++ b/tools/build/feature/test-libunwind-x86_64.c @@ -0,0 +1,27 @@ +#include <libunwind-x86_64.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +static unw_accessors_t accessors; + +int main(void) +{ + unw_addr_space_t addr_space; + + addr_space = unw_create_addr_space(&accessors, 0); + if (addr_space) + return 0; + + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + + return 0; +} diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 4d198d5c4203..c155d6bc47a7 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -1,5 +1,5 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS += -Wall -g -D_GNU_SOURCE +CFLAGS += -O2 -Wall -g -D_GNU_SOURCE all: lsgpio diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index 1124da375942..eb3f56efd215 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -147,7 +147,7 @@ void print_usage(void) int main(int argc, char **argv) { - const char *device_name; + const char *device_name = NULL; int ret; int c; diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index 2218b9add4c1..494c9c615d1c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/__fls.h> +#include "../../../../include/asm-generic/bitops/__fls.h" diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index dbf711a28f71..0e4995fa0248 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/fls.h> +#include "../../../../include/asm-generic/bitops/fls.h" diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 980b1f63c047..35bee0071e78 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -1 +1 @@ -#include <../../../../include/asm-generic/bitops/fls64.h> +#include "../../../../include/asm-generic/bitops/fls64.h" diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h new file mode 100644 index 000000000000..c65cc0aa2659 --- /dev/null +++ b/tools/include/linux/hashtable.h @@ -0,0 +1,152 @@ +/* + * Statically sized hash table implementation + * (C) 2012 Sasha Levin <levinsasha928@gmail.com> + */ + +#ifndef _LINUX_HASHTABLE_H +#define _LINUX_HASHTABLE_H + +#include <linux/list.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/hash.h> +#include <linux/log2.h> + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define DEFINE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + +#define DECLARE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] + +#define HASH_SIZE(name) (ARRAY_SIZE(name)) +#define HASH_BITS(name) ilog2(HASH_SIZE(name)) + +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ +#define hash_min(val, bits) \ + (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) + +static inline void __hash_init(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); +} + +/** + * hash_init - initialize a hash table + * @hashtable: hashtable to be initialized + * + * Calculates the size of the hashtable from the given parameter, otherwise + * same as hash_init_size. + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_add - add an object to a hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_hashed - check whether an object is in any hashtable + * @node: the &struct hlist_node of the object to be checked + */ +static inline bool hash_hashed(struct hlist_node *node) +{ + return !hlist_unhashed(node); +} + +static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + if (!hlist_empty(&ht[i])) + return false; + + return true; +} + +/** + * hash_empty - check whether a hashtable is empty + * @hashtable: hashtable to check + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_del - remove an object from a hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del(struct hlist_node *node) +{ + hlist_del_init(node); +} + +/** + * hash_for_each - iterate over a hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) + +/** + * hash_for_each_safe - iterate over a hashtable safe against removal of + * hash entry + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @tmp: a &struct used for temporary storage + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) + +/** + * hash_for_each_possible - iterate over all possible objects hashing to the + * same bucket + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_safe - iterate over all possible objects hashing to the + * same bucket safe against removals + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ + &name[hash_min(key, HASH_BITS(name))], member) + + +#endif diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h new file mode 100644 index 000000000000..841cec8ed525 --- /dev/null +++ b/tools/include/linux/stringify.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_STRINGIFY_H +#define __LINUX_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* !__LINUX_STRINGIFY_H */ diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 80159e6811c2..d9836c5eb694 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -3351,12 +3351,18 @@ int main(int argc, char *argv[]) /* Boot protocol version: 2.07 supports the fields for lguest. */ boot->hdr.version = 0x207; - /* The hardware_subarch value of "1" tells the Guest it's an lguest. */ - boot->hdr.hardware_subarch = 1; + /* X86_SUBARCH_LGUEST tells the Guest it's an lguest. */ + boot->hdr.hardware_subarch = X86_SUBARCH_LGUEST; /* Tell the entry path not to try to reload segment registers. */ boot->hdr.loadflags |= KEEP_SEGMENTS; + /* We don't support tboot: */ + boot->tboot_addr = 0; + + /* Ensure this is 0 to prevent APM from loading: */ + boot->apm_bios_info.version = 0; + /* We tell the kernel to initialize the Guest. */ tell_kernel(start); diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index bbc82c614bee..316f308a63ea 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index ef78c22ff44d..08556cf2c70d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep) return err; } +int procfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *procfs = procfs__mountpoint(); + + if (!procfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", procfs, entry); + + return filename__read_str(path, buf, sizep); +} + int sysfs__read_ull(const char *entry, unsigned long long *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 9f6598098dc5..16c9c2ed7c5b 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); +int procfs__read_str(const char *entry, char **buf, size_t *sizep); + int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh index 5334ad9d39b7..1069d96248c1 100755 --- a/tools/lib/lockdep/run_tests.sh +++ b/tools/lib/lockdep/run_tests.sh @@ -3,7 +3,7 @@ make &> /dev/null for i in `ls tests/*.c`; do - testname=$(basename -s .c "$i") + testname=$(basename "$i" .c) gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null echo -ne "$testname... " if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then @@ -11,11 +11,13 @@ for i in `ls tests/*.c`; do else echo "FAILED!" fi - rm tests/$testname + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi done for i in `ls tests/*.c`; do - testname=$(basename -s .c "$i") + testname=$(basename "$i" .c) gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null echo -ne "(PRELOAD) $testname... " if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then @@ -23,5 +25,7 @@ for i in `ls tests/*.c`; do else echo "FAILED!" fi - rm tests/$testname + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi done diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 629cf8c14e68..a8103700c172 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -8,8 +8,10 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar +CC ?= $(CROSS_COMPILE)gcc +LD ?= $(CROSS_COMPILE)ld +AR ?= $(CROSS_COMPILE)ar + RM = rm -f MAKEFLAGS += --no-print-directory diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 190cc886ab91..a8b6357d1ffe 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5427,10 +5427,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, } if (pevent->latency_format) { - trace_seq_printf(s, " %3d", record->cpu); pevent_data_lat_fmt(pevent, s, record); - } else - trace_seq_printf(s, " [%03d]", record->cpu); + } if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 0144b3d1bb77..88cccea3ca99 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1164,11 +1164,11 @@ process_filter(struct event_format *event, struct filter_arg **parg, current_op = current_exp; ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; if (ret < 0) goto fail; - *parg = current_op; - free(token); return 0; diff --git a/tools/net/bpf_dbg.c b/tools/net/bpf_dbg.c index 9a287bec695a..4f254bcc4423 100644 --- a/tools/net/bpf_dbg.c +++ b/tools/net/bpf_dbg.c @@ -129,16 +129,16 @@ struct bpf_regs { }; static struct sock_filter bpf_image[BPF_MAXINSNS + 1]; -static unsigned int bpf_prog_len = 0; +static unsigned int bpf_prog_len; static int bpf_breakpoints[64]; static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1]; static struct bpf_regs bpf_curr; -static unsigned int bpf_regs_len = 0; +static unsigned int bpf_regs_len; static int pcap_fd = -1; -static unsigned int pcap_packet = 0; -static size_t pcap_map_size = 0; +static unsigned int pcap_packet; +static size_t pcap_map_size; static char *pcap_ptr_va_start, *pcap_ptr_va_curr; static const char * const op_table[] = { @@ -1172,7 +1172,7 @@ static int cmd_breakpoint(char *subcmd) static int cmd_run(char *num) { - static uint32_t pass = 0, fail = 0; + static uint32_t pass, fail; bool has_limit = true; int pkts = 0, i = 0; diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 7cc72a336645..bd83149e7be0 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -23,6 +23,9 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> + +#include <linux/filter.h> #include "bpf_exp.yacc.h" @@ -79,22 +82,71 @@ extern void yyerror(const char *str); "txa" { return OP_TXA; } "#"?("len") { return K_PKT_LEN; } -"#"?("proto") { return K_PROTO; } -"#"?("type") { return K_TYPE; } -"#"?("poff") { return K_POFF; } -"#"?("ifidx") { return K_IFIDX; } -"#"?("nla") { return K_NLATTR; } -"#"?("nlan") { return K_NLATTR_NEST; } -"#"?("mark") { return K_MARK; } -"#"?("queue") { return K_QUEUE; } -"#"?("hatype") { return K_HATYPE; } -"#"?("rxhash") { return K_RXHASH; } -"#"?("cpu") { return K_CPU; } -"#"?("vlan_tci") { return K_VLAN_TCI; } -"#"?("vlan_pr") { return K_VLAN_AVAIL; } -"#"?("vlan_avail") { return K_VLAN_AVAIL; } -"#"?("vlan_tpid") { return K_VLAN_TPID; } -"#"?("rand") { return K_RAND; } + +"#"?("proto") { + yylval.number = SKF_AD_PROTOCOL; + return extension; + } +"#"?("type") { + yylval.number = SKF_AD_PKTTYPE; + return extension; + } +"#"?("poff") { + yylval.number = SKF_AD_PAY_OFFSET; + return extension; + } +"#"?("ifidx") { + yylval.number = SKF_AD_IFINDEX; + return extension; + } +"#"?("nla") { + yylval.number = SKF_AD_NLATTR; + return extension; + } +"#"?("nlan") { + yylval.number = SKF_AD_NLATTR_NEST; + return extension; + } +"#"?("mark") { + yylval.number = SKF_AD_MARK; + return extension; + } +"#"?("queue") { + yylval.number = SKF_AD_QUEUE; + return extension; + } +"#"?("hatype") { + yylval.number = SKF_AD_HATYPE; + return extension; + } +"#"?("rxhash") { + yylval.number = SKF_AD_RXHASH; + return extension; + } +"#"?("cpu") { + yylval.number = SKF_AD_CPU; + return extension; + } +"#"?("vlan_tci") { + yylval.number = SKF_AD_VLAN_TAG; + return extension; + } +"#"?("vlan_pr") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_avail") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_tpid") { + yylval.number = SKF_AD_VLAN_TPID; + return extension; + } +"#"?("rand") { + yylval.number = SKF_AD_RANDOM; + return extension; + } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e24eea1b0db5..56ba1de50784 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -35,6 +35,7 @@ enum jmp_type { JTL, JFL, JKL }; extern FILE *yyin; +extern int yylineno; extern int yylex(void); extern void yyerror(const char *str); @@ -55,14 +56,14 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI %token OP_LDXI -%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND +%token K_PKT_LEN %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' -%token number label +%token extension number label %type <label> label +%type <number> extension %type <number> number %% @@ -125,51 +126,9 @@ ldb bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); } | OP_LDB '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); } - | OP_LDB K_PROTO { + | OP_LDB extension { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDB K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDB K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDB K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDB K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDB K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDB K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDB K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDB K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDB K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDB K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDB K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDB K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDB K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDB K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldh @@ -179,51 +138,9 @@ ldh bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); } | OP_LDH '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); } - | OP_LDH K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LDH K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LDH K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LDH K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LDH K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LDH K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LDH K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LDH K_HATYPE { + | OP_LDH extension { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LDH K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LDH K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDH K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDH K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LDH K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LDH K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LDH K_VLAN_TPID { - bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } ; ldi @@ -238,51 +155,9 @@ ld bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); } | OP_LD K_PKT_LEN { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); } - | OP_LD K_PROTO { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PROTOCOL); } - | OP_LD K_TYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PKTTYPE); } - | OP_LD K_IFIDX { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_IFINDEX); } - | OP_LD K_NLATTR { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR); } - | OP_LD K_NLATTR_NEST { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_NLATTR_NEST); } - | OP_LD K_MARK { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_MARK); } - | OP_LD K_QUEUE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_QUEUE); } - | OP_LD K_HATYPE { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_HATYPE); } - | OP_LD K_RXHASH { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RXHASH); } - | OP_LD K_CPU { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_CPU); } - | OP_LD K_VLAN_TCI { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LD K_VLAN_AVAIL { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } - | OP_LD K_POFF { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_PAY_OFFSET); } - | OP_LD K_RAND { - bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_RANDOM); } - | OP_LD K_VLAN_TPID { + | OP_LD extension { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, - SKF_AD_OFF + SKF_AD_VLAN_TPID); } + SKF_AD_OFF + $2); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { @@ -776,5 +651,6 @@ void bpf_asm_compile(FILE *fp, bool cstyle) void yyerror(const char *str) { + fprintf(stderr, "error: %s at line %d\n", str, yylineno); exit(1); } diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index 5b3241340945..544b05a53b70 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -98,6 +98,9 @@ static char *get_klog_buff(unsigned int *klen) char *buff; len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0); + if (len < 0) + return NULL; + buff = malloc(len); if (!buff) return NULL; diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore new file mode 100644 index 000000000000..a0b3128bb31f --- /dev/null +++ b/tools/objtool/.gitignore @@ -0,0 +1,2 @@ +arch/x86/insn/inat-tables.c +objtool diff --git a/tools/objtool/Build b/tools/objtool/Build new file mode 100644 index 000000000000..0e89258a3541 --- /dev/null +++ b/tools/objtool/Build @@ -0,0 +1,13 @@ +objtool-y += arch/$(ARCH)/ +objtool-y += builtin-check.o +objtool-y += elf.o +objtool-y += special.o +objtool-y += objtool.o + +objtool-y += libstring.o + +CFLAGS += -I$(srctree)/tools/lib + +$(OUTPUT)libstring.o: ../lib/string.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt new file mode 100644 index 000000000000..55a60d331f47 --- /dev/null +++ b/tools/objtool/Documentation/stack-validation.txt @@ -0,0 +1,362 @@ +Compile-time stack metadata validation +====================================== + + +Overview +-------- + +The kernel CONFIG_STACK_VALIDATION option enables a host tool named +objtool which runs at compile time. It has a "check" subcommand which +analyzes every .o file and ensures the validity of its stack metadata. +It enforces a set of rules on asm code and C inline assembly code so +that stack traces can be reliable. + +Currently it only checks frame pointer usage, but there are plans to add +CFI validation for C files and CFI generation for asm files. + +For each function, it recursively follows all possible code paths and +validates the correct frame pointer state at each instruction. + +It also follows code paths involving special sections, like +.altinstructions, __jump_table, and __ex_table, which can add +alternative execution paths to a given instruction (or set of +instructions). Similarly, it knows how to follow switch statements, for +which gcc sometimes uses jump tables. + + +Why do we need stack metadata validation? +----------------------------------------- + +Here are some of the benefits of validating stack metadata: + +a) More reliable stack traces for frame pointer enabled kernels + + Frame pointers are used for debugging purposes. They allow runtime + code and debug tools to be able to walk the stack to determine the + chain of function call sites that led to the currently executing + code. + + For some architectures, frame pointers are enabled by + CONFIG_FRAME_POINTER. For some other architectures they may be + required by the ABI (sometimes referred to as "backchain pointers"). + + For C code, gcc automatically generates instructions for setting up + frame pointers when the -fno-omit-frame-pointer option is used. + + But for asm code, the frame setup instructions have to be written by + hand, which most people don't do. So the end result is that + CONFIG_FRAME_POINTER is honored for C code but not for most asm code. + + For stack traces based on frame pointers to be reliable, all + functions which call other functions must first create a stack frame + and update the frame pointer. If a first function doesn't properly + create a stack frame before calling a second function, the *caller* + of the first function will be skipped on the stack trace. + + For example, consider the following example backtrace with frame + pointers enabled: + + [<ffffffff81812584>] dump_stack+0x4b/0x63 + [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30 + [<ffffffff8127f568>] seq_read+0x108/0x3e0 + [<ffffffff812cce62>] proc_reg_read+0x42/0x70 + [<ffffffff81256197>] __vfs_read+0x37/0x100 + [<ffffffff81256b16>] vfs_read+0x86/0x130 + [<ffffffff81257898>] SyS_read+0x58/0xd0 + [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76 + + It correctly shows that the caller of cmdline_proc_show() is + seq_read(). + + If we remove the frame pointer logic from cmdline_proc_show() by + replacing the frame pointer related instructions with nops, here's + what it looks like instead: + + [<ffffffff81812584>] dump_stack+0x4b/0x63 + [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30 + [<ffffffff812cce62>] proc_reg_read+0x42/0x70 + [<ffffffff81256197>] __vfs_read+0x37/0x100 + [<ffffffff81256b16>] vfs_read+0x86/0x130 + [<ffffffff81257898>] SyS_read+0x58/0xd0 + [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76 + + Notice that cmdline_proc_show()'s caller, seq_read(), has been + skipped. Instead the stack trace seems to show that + cmdline_proc_show() was called by proc_reg_read(). + + The benefit of objtool here is that because it ensures that *all* + functions honor CONFIG_FRAME_POINTER, no functions will ever[*] be + skipped on a stack trace. + + [*] unless an interrupt or exception has occurred at the very + beginning of a function before the stack frame has been created, + or at the very end of the function after the stack frame has been + destroyed. This is an inherent limitation of frame pointers. + +b) 100% reliable stack traces for DWARF enabled kernels + + (NOTE: This is not yet implemented) + + As an alternative to frame pointers, DWARF Call Frame Information + (CFI) metadata can be used to walk the stack. Unlike frame pointers, + CFI metadata is out of band. So it doesn't affect runtime + performance and it can be reliable even when interrupts or exceptions + are involved. + + For C code, gcc automatically generates DWARF CFI metadata. But for + asm code, generating CFI is a tedious manual approach which requires + manually placed .cfi assembler macros to be scattered throughout the + code. It's clumsy and very easy to get wrong, and it makes the real + code harder to read. + + Stacktool will improve this situation in several ways. For code + which already has CFI annotations, it will validate them. For code + which doesn't have CFI annotations, it will generate them. So an + architecture can opt to strip out all the manual .cfi annotations + from their asm code and have objtool generate them instead. + + We might also add a runtime stack validation debug option where we + periodically walk the stack from schedule() and/or an NMI to ensure + that the stack metadata is sane and that we reach the bottom of the + stack. + + So the benefit of objtool here will be that external tooling should + always show perfect stack traces. And the same will be true for + kernel warning/oops traces if the architecture has a runtime DWARF + unwinder. + +c) Higher live patching compatibility rate + + (NOTE: This is not yet implemented) + + Currently with CONFIG_LIVEPATCH there's a basic live patching + framework which is safe for roughly 85-90% of "security" fixes. But + patches can't have complex features like function dependency or + prototype changes, or data structure changes. + + There's a strong need to support patches which have the more complex + features so that the patch compatibility rate for security fixes can + eventually approach something resembling 100%. To achieve that, a + "consistency model" is needed, which allows tasks to be safely + transitioned from an unpatched state to a patched state. + + One of the key requirements of the currently proposed livepatch + consistency model [*] is that it needs to walk the stack of each + sleeping task to determine if it can be transitioned to the patched + state. If objtool can ensure that stack traces are reliable, this + consistency model can be used and the live patching compatibility + rate can be improved significantly. + + [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com + + +Rules +----- + +To achieve the validation, objtool enforces the following rules: + +1. Each callable function must be annotated as such with the ELF + function type. In asm code, this is typically done using the + ENTRY/ENDPROC macros. If objtool finds a return instruction + outside of a function, it flags an error since that usually indicates + callable code which should be annotated accordingly. + + This rule is needed so that objtool can properly identify each + callable function in order to analyze its stack metadata. + +2. Conversely, each section of code which is *not* callable should *not* + be annotated as an ELF function. The ENDPROC macro shouldn't be used + in this case. + + This rule is needed so that objtool can ignore non-callable code. + Such code doesn't have to follow any of the other rules. + +3. Each callable function which calls another function must have the + correct frame pointer logic, if required by CONFIG_FRAME_POINTER or + the architecture's back chain rules. This can by done in asm code + with the FRAME_BEGIN/FRAME_END macros. + + This rule ensures that frame pointer based stack traces will work as + designed. If function A doesn't create a stack frame before calling + function B, the _caller_ of function A will be skipped on the stack + trace. + +4. Dynamic jumps and jumps to undefined symbols are only allowed if: + + a) the jump is part of a switch statement; or + + b) the jump matches sibling call semantics and the frame pointer has + the same value it had on function entry. + + This rule is needed so that objtool can reliably analyze all of a + function's code paths. If a function jumps to code in another file, + and it's not a sibling call, objtool has no way to follow the jump + because it only analyzes a single file at a time. + +5. A callable function may not execute kernel entry/exit instructions. + The only code which needs such instructions is kernel entry code, + which shouldn't be be in callable functions anyway. + + This rule is just a sanity check to ensure that callable functions + return normally. + + +Errors in .S files +------------------ + +If you're getting an error in a compiled .S file which you don't +understand, first make sure that the affected code follows the above +rules. + +Here are some examples of common warnings reported by objtool, what +they mean, and suggestions for how to fix them. + + +1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup + + The func() function made a function call without first saving and/or + updating the frame pointer. + + If func() is indeed a callable function, add proper frame pointer + logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove + its ELF function annotation by changing ENDPROC to END. + + If you're getting this error in a .c file, see the "Errors in .c + files" section. + + +2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function + + A return instruction was detected, but objtool couldn't find a way + for a callable function to reach the instruction. + + If the return instruction is inside (or reachable from) a callable + function, the function needs to be annotated with the ENTRY/ENDPROC + macros. + + If you _really_ need a return instruction outside of a function, and + are 100% sure that it won't affect stack traces, you can tell + objtool to ignore it. See the "Adding exceptions" section below. + + +3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction + + The instruction lives inside of a callable function, but there's no + possible control flow path from the beginning of the function to the + instruction. + + If the instruction is actually needed, and it's actually in a + callable function, ensure that its function is properly annotated + with ENTRY/ENDPROC. + + If it's not actually in a callable function (e.g. kernel entry code), + change ENDPROC to END. + + +4. asm_file.o: warning: objtool: func(): can't find starting instruction + or + asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction + + Did you put data in a text section? If so, that can confuse + objtool's instruction decoder. Move the data to a more appropriate + section like .data or .rodata. + + +5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction + + This is a kernel entry/exit instruction like sysenter or sysret. + Such instructions aren't allowed in a callable function, and are most + likely part of the kernel entry code. + + If the instruction isn't actually in a callable function, change + ENDPROC to END. + + +6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer + + This is a dynamic jump or a jump to an undefined symbol. Stacktool + assumed it's a sibling call and detected that the frame pointer + wasn't first restored to its original state. + + If it's not really a sibling call, you may need to move the + destination code to the local file. + + If the instruction is not actually in a callable function (e.g. + kernel entry code), change ENDPROC to END. + + +7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch + + The instruction's frame pointer state is inconsistent, depending on + which execution path was taken to reach the instruction. + + Make sure the function pushes and sets up the frame pointer (for + x86_64, this means rbp) at the beginning of the function and pops it + at the end of the function. Also make sure that no other code in the + function touches the frame pointer. + + +Errors in .c files +------------------ + +1. c_file.o: warning: objtool: funcA() falls through to next function funcB() + + This means that funcA() doesn't end with a return instruction or an + unconditional jump, and that objtool has determined that the function + can fall through into the next function. There could be different + reasons for this: + + 1) funcA()'s last instruction is a call to a "noreturn" function like + panic(). In this case the noreturn function needs to be added to + objtool's hard-coded global_noreturns array. Feel free to bug the + objtool maintainer, or you can submit a patch. + + 2) funcA() uses the unreachable() annotation in a section of code + that is actually reachable. + + 3) If funcA() calls an inline function, the object code for funcA() + might be corrupt due to a gcc bug. For more details, see: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 + +2. If you're getting any other objtool error in a compiled .c file, it + may be because the file uses an asm() statement which has a "call" + instruction. An asm() statement with a call instruction must declare + the use of the stack pointer in its output operand. For example, on + x86_64: + + register void *__sp asm("rsp"); + asm volatile("call func" : "+r" (__sp)); + + Otherwise the stack frame may not get created before the call. + +3. Another possible cause for errors in C code is if the Makefile removes + -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. + +Also see the above section for .S file errors for more information what +the individual error messages mean. + +If the error doesn't seem to make sense, it could be a bug in objtool. +Feel free to ask the objtool maintainer for help. + + +Adding exceptions +----------------- + +If you _really_ need objtool to ignore something, and are 100% sure +that it won't affect kernel stack traces, you can tell objtool to +ignore it: + +- To skip validation of a function, use the STACK_FRAME_NON_STANDARD + macro. + +- To skip validation of a file, add + + OBJECT_FILES_NON_STANDARD_filename.o := n + + to the Makefile. + +- To skip validation of a directory, add + + OBJECT_FILES_NON_STANDARD := y + + to the Makefile. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile new file mode 100644 index 000000000000..6765c7e949f3 --- /dev/null +++ b/tools/objtool/Makefile @@ -0,0 +1,63 @@ +include ../scripts/Makefile.include + +ifndef ($(ARCH)) +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) +ARCH := x86 +endif +endif + +# always use the host compiler +CC = gcc +LD = ld +AR = ar + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/ +LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(PWD)/) +LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a + +OBJTOOL := $(OUTPUT)objtool +OBJTOOL_IN := $(OBJTOOL)-in.o + +all: $(OBJTOOL) + +INCLUDES := -I$(srctree)/tools/include +CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) +LDFLAGS += -lelf $(LIBSUBCMD) + +AWK = awk +export srctree OUTPUT CFLAGS ARCH AWK +include $(srctree)/tools/build/Makefile.include + +$(OBJTOOL_IN): fixdep FORCE + @$(MAKE) $(build)=objtool + +$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) + @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \ + diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ + diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ + diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ + diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ + diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ + || echo "Warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true + $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ + + +$(LIBSUBCMD): fixdep FORCE + $(Q)$(MAKE) -C $(SUBCMD_SRCDIR) OUTPUT=$(LIBSUBCMD_OUTPUT) + +clean: + $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) + $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h new file mode 100644 index 000000000000..f7350fcedc70 --- /dev/null +++ b/tools/objtool/arch.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ARCH_H +#define _ARCH_H + +#include <stdbool.h> +#include "elf.h" + +#define INSN_FP_SAVE 1 +#define INSN_FP_SETUP 2 +#define INSN_FP_RESTORE 3 +#define INSN_JUMP_CONDITIONAL 4 +#define INSN_JUMP_UNCONDITIONAL 5 +#define INSN_JUMP_DYNAMIC 6 +#define INSN_CALL 7 +#define INSN_CALL_DYNAMIC 8 +#define INSN_RETURN 9 +#define INSN_CONTEXT_SWITCH 10 +#define INSN_BUG 11 +#define INSN_NOP 12 +#define INSN_OTHER 13 +#define INSN_LAST INSN_OTHER + +int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, + unsigned long *displacement); + +#endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build new file mode 100644 index 000000000000..debbdb0b5c43 --- /dev/null +++ b/tools/objtool/arch/x86/Build @@ -0,0 +1,12 @@ +objtool-y += decode.o + +inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk +inat_tables_maps = arch/x86/insn/x86-opcode-map.txt + +$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c + +CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c new file mode 100644 index 000000000000..c0c0b265e88e --- /dev/null +++ b/tools/objtool/arch/x86/decode.c @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> + +#define unlikely(cond) (cond) +#include "insn/insn.h" +#include "insn/inat.c" +#include "insn/insn.c" + +#include "../../elf.h" +#include "../../arch.h" +#include "../../warn.h" + +static int is_x86_64(struct elf *elf) +{ + switch (elf->ehdr.e_machine) { + case EM_X86_64: + return 1; + case EM_386: + return 0; + default: + WARN("unexpected ELF machine type %d", elf->ehdr.e_machine); + return -1; + } +} + +int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, + unsigned long *immediate) +{ + struct insn insn; + int x86_64; + unsigned char op1, op2, ext; + + x86_64 = is_x86_64(elf); + if (x86_64 == -1) + return -1; + + insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64); + insn_get_length(&insn); + insn_get_opcode(&insn); + insn_get_modrm(&insn); + insn_get_immediate(&insn); + + if (!insn_complete(&insn)) { + WARN_FUNC("can't decode instruction", sec, offset); + return -1; + } + + *len = insn.length; + *type = INSN_OTHER; + + if (insn.vex_prefix.nbytes) + return 0; + + op1 = insn.opcode.bytes[0]; + op2 = insn.opcode.bytes[1]; + + switch (op1) { + case 0x55: + if (!insn.rex_prefix.nbytes) + /* push rbp */ + *type = INSN_FP_SAVE; + break; + + case 0x5d: + if (!insn.rex_prefix.nbytes) + /* pop rbp */ + *type = INSN_FP_RESTORE; + break; + + case 0x70 ... 0x7f: + *type = INSN_JUMP_CONDITIONAL; + break; + + case 0x89: + if (insn.rex_prefix.nbytes == 1 && + insn.rex_prefix.bytes[0] == 0x48 && + insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5) + /* mov rsp, rbp */ + *type = INSN_FP_SETUP; + break; + + case 0x90: + *type = INSN_NOP; + break; + + case 0x0f: + if (op2 >= 0x80 && op2 <= 0x8f) + *type = INSN_JUMP_CONDITIONAL; + else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || + op2 == 0x35) + /* sysenter, sysret */ + *type = INSN_CONTEXT_SWITCH; + else if (op2 == 0x0b || op2 == 0xb9) + /* ud2 */ + *type = INSN_BUG; + else if (op2 == 0x0d || op2 == 0x1f) + /* nopl/nopw */ + *type = INSN_NOP; + else if (op2 == 0x01 && insn.modrm.nbytes && + (insn.modrm.bytes[0] == 0xc2 || + insn.modrm.bytes[0] == 0xd8)) + /* vmlaunch, vmrun */ + *type = INSN_CONTEXT_SWITCH; + + break; + + case 0xc9: /* leave */ + *type = INSN_FP_RESTORE; + break; + + case 0xe3: /* jecxz/jrcxz */ + *type = INSN_JUMP_CONDITIONAL; + break; + + case 0xe9: + case 0xeb: + *type = INSN_JUMP_UNCONDITIONAL; + break; + + case 0xc2: + case 0xc3: + *type = INSN_RETURN; + break; + + case 0xc5: /* iret */ + case 0xca: /* retf */ + case 0xcb: /* retf */ + *type = INSN_CONTEXT_SWITCH; + break; + + case 0xe8: + *type = INSN_CALL; + break; + + case 0xff: + ext = X86_MODRM_REG(insn.modrm.bytes[0]); + if (ext == 2 || ext == 3) + *type = INSN_CALL_DYNAMIC; + else if (ext == 4) + *type = INSN_JUMP_DYNAMIC; + else if (ext == 5) /*jmpf */ + *type = INSN_CONTEXT_SWITCH; + + break; + + default: + break; + } + + *immediate = insn.immediate.nbytes ? insn.immediate.value : 0; + + return 0; +} diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..093a892026f9 --- /dev/null +++ b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk @@ -0,0 +1,387 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} + diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/insn/inat.c new file mode 100644 index 000000000000..e4bf28e6f4c7 --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat.c @@ -0,0 +1,97 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "insn.h" + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} + diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "inat_types.h" + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/insn/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/objtool/arch/x86/insn/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c new file mode 100644 index 000000000000..9f26eae6c9f0 --- /dev/null +++ b/tools/objtool/arch/x86/insn/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include "inat.h" +#include "insn.h" + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(signed char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(signed char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(signed char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/objtool/arch/x86/insn/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include "inat.h" + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt new file mode 100644 index 000000000000..d388de72eaca --- /dev/null +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -0,0 +1,984 @@ +# x86 Opcode Maps +# +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +18: Grp16 (1A) +19: +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey +7a: +7b: +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +# 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) +86: JBE/JNA Jz (f64) +87: JA/JNBE Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +# 0x0f 0xf0-0xff +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: vcvtph2ps Vx,Wx,Ib (66),(v) +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) +1b: +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +1f: +# 0x0f 0x38 0x20-0x2f +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) +26: +27: +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) +# 0x0f 0x38 0x30-0x3f +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) +# 0x0f 0x38 0x40-0x8f +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +# 0x0f 0x38 0x90-0xbf (FMA) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +# 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Mp +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: rdpkru (110),(11B) | wrpkru (111),(11B) +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +EndTable + +GrpTable: Grp10 +EndTable + +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp15 +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +4: XSAVE +5: XRSTOR | lfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c new file mode 100644 index 000000000000..e8a1e69eb92c --- /dev/null +++ b/tools/objtool/builtin-check.c @@ -0,0 +1,1206 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * objtool check: + * + * This command analyzes every .o file and ensures the validity of its stack + * trace metadata. It enforces a set of rules on asm code and C inline + * assembly code so that stack traces can be reliable. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + +#include <string.h> +#include <subcmd/parse-options.h> + +#include "builtin.h" +#include "elf.h" +#include "special.h" +#include "arch.h" +#include "warn.h" + +#include <linux/hashtable.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define STATE_FP_SAVED 0x1 +#define STATE_FP_SETUP 0x2 +#define STATE_FENTRY 0x4 + +struct instruction { + struct list_head list; + struct hlist_node hash; + struct section *sec; + unsigned long offset; + unsigned int len, state; + unsigned char type; + unsigned long immediate; + bool alt_group, visited; + struct symbol *call_dest; + struct instruction *jump_dest; + struct list_head alts; + struct symbol *func; +}; + +struct alternative { + struct list_head list; + struct instruction *insn; +}; + +struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 16); + struct section *rodata, *whitelist; + bool ignore_unreachables, c_file; +}; + +const char *objname; +static bool nofp; + +static struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset) +{ + struct instruction *insn; + + hash_for_each_possible(file->insn_hash, insn, hash, offset) + if (insn->sec == sec && insn->offset == offset) + return insn; + + return NULL; +} + +static struct instruction *next_insn_same_sec(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + + if (&next->list == &file->insn_list || next->sec != insn->sec) + return NULL; + + return next; +} + +#define for_each_insn(file, insn) \ + list_for_each_entry(insn, &file->insn_list, list) + +#define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn && &insn->list != &file->insn_list && \ + insn->sec == func->sec && \ + insn->offset < func->offset + func->len; \ + insn = list_next_entry(insn, list)) + +#define sec_for_each_insn_from(file, insn) \ + for (; insn; insn = next_insn_same_sec(file, insn)) + + +/* + * Check if the function has been manually whitelisted with the + * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted + * due to its use of a context switching instruction. + */ +static bool ignore_func(struct objtool_file *file, struct symbol *func) +{ + struct rela *rela; + struct instruction *insn; + + /* check for STACK_FRAME_NON_STANDARD */ + if (file->whitelist && file->whitelist->rela) + list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) + if (rela->sym->sec == func->sec && + rela->addend == func->offset) + return true; + + /* check if it has a context switching instruction */ + func_for_each_insn(file, func, insn) + if (insn->type == INSN_CONTEXT_SWITCH) + return true; + + return false; +} + +/* + * This checks to see if the given function is a "noreturn" function. + * + * For global functions which are outside the scope of this object file, we + * have to keep a manual list of them. + * + * For local functions, we have to detect them manually by simply looking for + * the lack of a return instruction. + * + * Returns: + * -1: error + * 0: no dead end + * 1: dead end + */ +static int __dead_end_function(struct objtool_file *file, struct symbol *func, + int recursion) +{ + int i; + struct instruction *insn; + bool empty = true; + + /* + * Unfortunately these have to be hard coded because the noreturn + * attribute isn't provided in ELF data. + */ + static const char * const global_noreturns[] = { + "__stack_chk_fail", + "panic", + "do_exit", + "__module_put_and_exit", + "complete_and_exit", + "kvm_spurious_fault", + "__reiserfs_panic", + "lbug_with_loc" + }; + + if (func->bind == STB_WEAK) + return 0; + + if (func->bind == STB_GLOBAL) + for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) + if (!strcmp(func->name, global_noreturns[i])) + return 1; + + if (!func->sec) + return 0; + + func_for_each_insn(file, func, insn) { + empty = false; + + if (insn->type == INSN_RETURN) + return 0; + } + + if (empty) + return 0; + + /* + * A function can have a sibling call instead of a return. In that + * case, the function's dead-end status depends on whether the target + * of the sibling call returns. + */ + func_for_each_insn(file, func, insn) { + if (insn->sec != func->sec || + insn->offset >= func->offset + func->len) + break; + + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + struct instruction *dest = insn->jump_dest; + struct symbol *dest_func; + + if (!dest) + /* sibling call to another file */ + return 0; + + if (dest->sec != func->sec || + dest->offset < func->offset || + dest->offset >= func->offset + func->len) { + /* local sibling call */ + dest_func = find_symbol_by_offset(dest->sec, + dest->offset); + if (!dest_func) + continue; + + if (recursion == 5) { + WARN_FUNC("infinite recursion (objtool bug!)", + dest->sec, dest->offset); + return -1; + } + + return __dead_end_function(file, dest_func, + recursion + 1); + } + } + + if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) + /* sibling call */ + return 0; + } + + return 1; +} + +static int dead_end_function(struct objtool_file *file, struct symbol *func) +{ + return __dead_end_function(file, func, 0); +} + +/* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. + */ +static int decode_instructions(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + unsigned long offset; + struct instruction *insn; + int ret; + + list_for_each_entry(sec, &file->elf->sections, list) { + + if (!(sec->sh.sh_flags & SHF_EXECINSTR)) + continue; + + for (offset = 0; offset < sec->len; offset += insn->len) { + insn = malloc(sizeof(*insn)); + memset(insn, 0, sizeof(*insn)); + + INIT_LIST_HEAD(&insn->alts); + insn->sec = sec; + insn->offset = offset; + + ret = arch_decode_instruction(file->elf, sec, offset, + sec->len - offset, + &insn->len, &insn->type, + &insn->immediate); + if (ret) + return ret; + + if (!insn->type || insn->type > INSN_LAST) { + WARN_FUNC("invalid instruction type %d", + insn->sec, insn->offset, insn->type); + return -1; + } + + hash_add(file->insn_hash, &insn->hash, insn->offset); + list_add_tail(&insn->list, &file->insn_list); + } + + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!find_insn(file, sec, func->offset)) { + WARN("%s(): can't find starting instruction", + func->name); + return -1; + } + + func_for_each_insn(file, func, insn) + if (!insn->func) + insn->func = func; + } + } + + return 0; +} + +/* + * Warnings shouldn't be reported for ignored functions. + */ +static void add_ignores(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + struct symbol *func; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!ignore_func(file, func)) + continue; + + func_for_each_insn(file, func, insn) + insn->visited = true; + } + } +} + +/* + * Find the destination instructions for all jumps. + */ +static int add_jump_destinations(struct objtool_file *file) +{ + struct instruction *insn; + struct rela *rela; + struct section *dest_sec; + unsigned long dest_off; + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + /* skip ignores */ + if (insn->visited) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_sec = insn->sec; + dest_off = insn->offset + insn->len + insn->immediate; + } else if (rela->sym->type == STT_SECTION) { + dest_sec = rela->sym->sec; + dest_off = rela->addend + 4; + } else if (rela->sym->sec->idx) { + dest_sec = rela->sym->sec; + dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else { + /* sibling call */ + insn->jump_dest = 0; + continue; + } + + insn->jump_dest = find_insn(file, dest_sec, dest_off); + if (!insn->jump_dest) { + + /* + * This is a special case where an alt instruction + * jumps past the end of the section. These are + * handled later in handle_group_alt(). + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); + return -1; + } + } + + return 0; +} + +/* + * Find the destination instructions for all calls. + */ +static int add_call_destinations(struct objtool_file *file) +{ + struct instruction *insn; + unsigned long dest_off; + struct rela *rela; + + for_each_insn(file, insn) { + if (insn->type != INSN_CALL) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_off = insn->offset + insn->len + insn->immediate; + insn->call_dest = find_symbol_by_offset(insn->sec, + dest_off); + if (!insn->call_dest) { + WARN_FUNC("can't find call dest symbol at offset 0x%lx", + insn->sec, insn->offset, dest_off); + return -1; + } + } else if (rela->sym->type == STT_SECTION) { + insn->call_dest = find_symbol_by_offset(rela->sym->sec, + rela->addend+4); + if (!insn->call_dest || + insn->call_dest->type != STT_FUNC) { + WARN_FUNC("can't find call dest symbol at %s+0x%x", + insn->sec, insn->offset, + rela->sym->sec->name, + rela->addend + 4); + return -1; + } + } else + insn->call_dest = rela->sym; + } + + return 0; +} + +/* + * The .alternatives section requires some extra special care, over and above + * what other special sections require: + * + * 1. Because alternatives are patched in-place, we need to insert a fake jump + * instruction at the end so that validate_branch() skips all the original + * replaced instructions when validating the new instruction path. + * + * 2. An added wrinkle is that the new instruction length might be zero. In + * that case the old instructions are replaced with noops. We simulate that + * by creating a fake jump as the only new instruction. + * + * 3. In some cases, the alternative section includes an instruction which + * conditionally jumps to the _end_ of the entry. We have to modify these + * jumps' destinations to point back to .text rather than the end of the + * entry in .altinstr_replacement. + * + * 4. It has been requested that we don't validate the !POPCNT feature path + * which is a "very very small percentage of machines". + */ +static int handle_group_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + unsigned long dest_off; + + last_orig_insn = NULL; + insn = orig_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->orig_off + special_alt->orig_len) + break; + + if (special_alt->skip_orig) + insn->type = INSN_NOP; + + insn->alt_group = true; + last_orig_insn = insn; + } + + if (!next_insn_same_sec(file, last_orig_insn)) { + WARN("%s: don't know how to handle alternatives at end of section", + special_alt->orig_sec->name); + return -1; + } + + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + + if (!special_alt->new_len) { + *new_insn = fake_jump; + return 0; + } + + last_new_insn = NULL; + insn = *new_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->new_off + special_alt->new_len) + break; + + last_new_insn = insn; + + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + if (!insn->immediate) + continue; + + dest_off = insn->offset + insn->len + insn->immediate; + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = fake_jump; + + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", + insn->sec, insn->offset); + return -1; + } + } + + if (!last_new_insn) { + WARN_FUNC("can't find last new alternative instruction", + special_alt->new_sec, special_alt->new_off); + return -1; + } + + list_add(&fake_jump->list, &last_new_insn->list); + + return 0; +} + +/* + * A jump table entry can either convert a nop to a jump or a jump to a nop. + * If the original instruction is a jump, make the alt entry an effective nop + * by just skipping the original instruction. + */ +static int handle_jump_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + if (orig_insn->type == INSN_NOP) + return 0; + + if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { + WARN_FUNC("unsupported instruction at jump label", + orig_insn->sec, orig_insn->offset); + return -1; + } + + *new_insn = list_next_entry(orig_insn, list); + return 0; +} + +/* + * Read all the special sections which have alternate instructions which can be + * patched in or redirected to at runtime. Each instruction having alternate + * instruction(s) has them added to its insn->alts list, which will be + * traversed in validate_branch(). + */ +static int add_special_section_alts(struct objtool_file *file) +{ + struct list_head special_alts; + struct instruction *orig_insn, *new_insn; + struct special_alt *special_alt, *tmp; + struct alternative *alt; + int ret; + + ret = special_get_alts(file->elf, &special_alts); + if (ret) + return ret; + + list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + + orig_insn = find_insn(file, special_alt->orig_sec, + special_alt->orig_off); + if (!orig_insn) { + WARN_FUNC("special: can't find orig instruction", + special_alt->orig_sec, special_alt->orig_off); + ret = -1; + goto out; + } + + new_insn = NULL; + if (!special_alt->group || special_alt->new_len) { + new_insn = find_insn(file, special_alt->new_sec, + special_alt->new_off); + if (!new_insn) { + WARN_FUNC("special: can't find new instruction", + special_alt->new_sec, + special_alt->new_off); + ret = -1; + goto out; + } + } + + if (special_alt->group) { + ret = handle_group_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } else if (special_alt->jump_or_nop) { + ret = handle_jump_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } + + alt->insn = new_insn; + list_add_tail(&alt->list, &orig_insn->alts); + + list_del(&special_alt->list); + free(special_alt); + } + +out: + return ret; +} + +static int add_switch_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn, struct rela *table, + struct rela *next_table) +{ + struct rela *rela = table; + struct instruction *alt_insn; + struct alternative *alt; + + list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { + if (rela == next_table) + break; + + if (rela->sym->sec != insn->sec || + rela->addend <= func->offset || + rela->addend >= func->offset + func->len) + break; + + alt_insn = find_insn(file, insn->sec, rela->addend); + if (!alt_insn) { + WARN("%s: can't find instruction at %s+0x%x", + file->rodata->rela->name, insn->sec->name, + rela->addend); + return -1; + } + + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + + alt->insn = alt_insn; + list_add_tail(&alt->list, &insn->alts); + } + + return 0; +} + +static int add_func_switch_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn, *prev_jump; + struct rela *text_rela, *rodata_rela, *prev_rela; + int ret; + + prev_jump = NULL; + + func_for_each_insn(file, func, insn) { + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* common case: jmpq *[addr](,%rax,8) */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend); + + /* + * rare case: jmpq *[addr](%rip) + * + * This check is for a rare gcc quirk, currently only seen in + * three driver functions in the kernel, only with certain + * obscure non-distro configs. + * + * As part of an optimization, gcc makes a copy of an existing + * switch jump table, modifies it, and then hard-codes the jump + * (albeit with an indirect jump) to use a single entry in the + * table. The rest of the jump table and some of its jump + * targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable + * instruction warnings for the entire object file. Ideally we + * would just ignore them for the function, but that would + * require redesigning the code quite a bit. And honestly + * that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such + * a rare issue. + * + * kbuild reports: + * - https://lkml.kernel.org/r/201603231906.LWcVUpxm%25fengguang.wu@intel.com + * - https://lkml.kernel.org/r/201603271114.K9i45biy%25fengguang.wu@intel.com + * - https://lkml.kernel.org/r/201603291058.zuJ6ben1%25fengguang.wu@intel.com + * + * gcc bug: + * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70604 + */ + if (!rodata_rela) { + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend + 4); + if (rodata_rela) + file->ignore_unreachables = true; + } + + if (!rodata_rela) + continue; + + /* + * We found a switch table, but we don't know yet how big it + * is. Don't add it until we reach the end of the function or + * the beginning of another switch table in the same function. + */ + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, + rodata_rela); + if (ret) + return ret; + } + + prev_jump = insn; + prev_rela = rodata_rela; + } + + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + if (ret) + return ret; + } + + return 0; +} + +/* + * For some switch statements, gcc generates a jump table in the .rodata + * section which contains a list of addresses within the function to jump to. + * This finds these jump tables and adds them to the insn->alts lists. + */ +static int add_switch_table_alts(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + int ret; + + if (!file->rodata || !file->rodata->rela) + return 0; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + ret = add_func_switch_tables(file, func); + if (ret) + return ret; + } + } + + return 0; +} + +static int decode_sections(struct objtool_file *file) +{ + int ret; + + ret = decode_instructions(file); + if (ret) + return ret; + + add_ignores(file); + + ret = add_jump_destinations(file); + if (ret) + return ret; + + ret = add_call_destinations(file); + if (ret) + return ret; + + ret = add_special_section_alts(file); + if (ret) + return ret; + + ret = add_switch_table_alts(file); + if (ret) + return ret; + + return 0; +} + +static bool is_fentry_call(struct instruction *insn) +{ + if (insn->type == INSN_CALL && + insn->call_dest->type == STT_NOTYPE && + !strcmp(insn->call_dest->name, "__fentry__")) + return true; + + return false; +} + +static bool has_modified_stack_frame(struct instruction *insn) +{ + return (insn->state & STATE_FP_SAVED) || + (insn->state & STATE_FP_SETUP); +} + +static bool has_valid_stack_frame(struct instruction *insn) +{ + return (insn->state & STATE_FP_SAVED) && + (insn->state & STATE_FP_SETUP); +} + +static unsigned int frame_state(unsigned long state) +{ + return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); +} + +/* + * Follow the branch starting at the given instruction, and recursively follow + * any other branches (jumps). Meanwhile, track the frame pointer state at + * each instruction and validate all the rules described in + * tools/objtool/Documentation/stack-validation.txt. + */ +static int validate_branch(struct objtool_file *file, + struct instruction *first, unsigned char first_state) +{ + struct alternative *alt; + struct instruction *insn; + struct section *sec; + struct symbol *func = NULL; + unsigned char state; + int ret; + + insn = first; + sec = insn->sec; + state = first_state; + + if (insn->alt_group && list_empty(&insn->alts)) { + WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", + sec, insn->offset); + return 1; + } + + while (1) { + if (file->c_file && insn->func) { + if (func && func != insn->func) { + WARN("%s() falls through to next function %s()", + func->name, insn->func->name); + return 1; + } + + func = insn->func; + } + + if (insn->visited) { + if (frame_state(insn->state) != frame_state(state)) { + WARN_FUNC("frame pointer state mismatch", + sec, insn->offset); + return 1; + } + + return 0; + } + + insn->visited = true; + insn->state = state; + + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } + + switch (insn->type) { + + case INSN_FP_SAVE: + if (!nofp) { + if (state & STATE_FP_SAVED) { + WARN_FUNC("duplicate frame pointer save", + sec, insn->offset); + return 1; + } + state |= STATE_FP_SAVED; + } + break; + + case INSN_FP_SETUP: + if (!nofp) { + if (state & STATE_FP_SETUP) { + WARN_FUNC("duplicate frame pointer setup", + sec, insn->offset); + return 1; + } + state |= STATE_FP_SETUP; + } + break; + + case INSN_FP_RESTORE: + if (!nofp) { + if (has_valid_stack_frame(insn)) + state &= ~STATE_FP_SETUP; + + state &= ~STATE_FP_SAVED; + } + break; + + case INSN_RETURN: + if (!nofp && has_modified_stack_frame(insn)) { + WARN_FUNC("return without frame pointer restore", + sec, insn->offset); + return 1; + } + return 0; + + case INSN_CALL: + if (is_fentry_call(insn)) { + state |= STATE_FENTRY; + break; + } + + ret = dead_end_function(file, insn->call_dest); + if (ret == 1) + return 0; + if (ret == -1) + return 1; + + /* fallthrough */ + case INSN_CALL_DYNAMIC: + if (!nofp && !has_valid_stack_frame(insn)) { + WARN_FUNC("call without frame pointer save/setup", + sec, insn->offset); + return 1; + } + break; + + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + if (insn->jump_dest) { + ret = validate_branch(file, insn->jump_dest, + state); + if (ret) + return 1; + } else if (has_modified_stack_frame(insn)) { + WARN_FUNC("sibling call from callable instruction with changed frame pointer", + sec, insn->offset); + return 1; + } /* else it's a sibling call */ + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + + case INSN_JUMP_DYNAMIC: + if (list_empty(&insn->alts) && + has_modified_stack_frame(insn)) { + WARN_FUNC("sibling call from callable instruction with changed frame pointer", + sec, insn->offset); + return 1; + } + + return 0; + + case INSN_BUG: + return 0; + + default: + break; + } + + insn = next_insn_same_sec(file, insn); + if (!insn) { + WARN("%s: unexpected end of section", sec->name); + return 1; + } + } + + return 0; +} + +static bool is_gcov_insn(struct instruction *insn) +{ + struct rela *rela; + struct section *sec; + struct symbol *sym; + unsigned long offset; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); + if (!rela) + return false; + + if (rela->sym->type != STT_SECTION) + return false; + + sec = rela->sym->sec; + offset = rela->addend + insn->offset + insn->len - rela->offset; + + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_OBJECT) + continue; + + if (offset >= sym->offset && offset < sym->offset + sym->len) + return (!memcmp(sym->name, "__gcov0.", 8)); + } + + return false; +} + +static bool is_kasan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, "__asan_handle_no_return")); +} + +static bool is_ubsan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, + "__ubsan_handle_builtin_unreachable")); +} + +static bool ignore_unreachable_insn(struct symbol *func, + struct instruction *insn) +{ + int i; + + if (insn->type == INSN_NOP) + return true; + + if (is_gcov_insn(insn)) + return true; + + /* + * Check if this (or a subsequent) instruction is related to + * CONFIG_UBSAN or CONFIG_KASAN. + * + * End the search at 5 instructions to avoid going into the weeds. + */ + for (i = 0; i < 5; i++) { + + if (is_kasan_insn(insn) || is_ubsan_insn(insn)) + return true; + + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { + insn = insn->jump_dest; + continue; + } + + if (insn->offset + insn->len >= func->offset + func->len) + break; + insn = list_next_entry(insn, list); + } + + return false; +} + +static int validate_functions(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + struct instruction *insn; + int ret, warnings = 0; + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + insn = find_insn(file, sec, func->offset); + if (!insn) + continue; + + ret = validate_branch(file, insn, 0); + warnings += ret; + } + } + + list_for_each_entry(sec, &file->elf->sections, list) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + func_for_each_insn(file, func, insn) { + if (insn->visited) + continue; + + insn->visited = true; + + if (file->ignore_unreachables || warnings || + ignore_unreachable_insn(func, insn)) + continue; + + WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); + warnings++; + } + } + } + + return warnings; +} + +static int validate_uncallable_instructions(struct objtool_file *file) +{ + struct instruction *insn; + int warnings = 0; + + for_each_insn(file, insn) { + if (!insn->visited && insn->type == INSN_RETURN) { + WARN_FUNC("return instruction outside of a callable function", + insn->sec, insn->offset); + warnings++; + } + } + + return warnings; +} + +static void cleanup(struct objtool_file *file) +{ + struct instruction *insn, *tmpinsn; + struct alternative *alt, *tmpalt; + + list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { + list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { + list_del(&alt->list); + free(alt); + } + list_del(&insn->list); + hash_del(&insn->hash); + free(insn); + } + elf_close(file->elf); +} + +const char * const check_usage[] = { + "objtool check [<options>] file.o", + NULL, +}; + +int cmd_check(int argc, const char **argv) +{ + struct objtool_file file; + int ret, warnings = 0; + + const struct option options[] = { + OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), + OPT_END(), + }; + + argc = parse_options(argc, argv, options, check_usage, 0); + + if (argc != 1) + usage_with_options(check_usage, options); + + objname = argv[0]; + + file.elf = elf_open(objname); + if (!file.elf) { + fprintf(stderr, "error reading elf file %s\n", objname); + return 1; + } + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.rodata = find_section_by_name(file.elf, ".rodata"); + file.ignore_unreachables = false; + file.c_file = find_section_by_name(file.elf, ".comment"); + + ret = decode_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + + ret = validate_functions(&file); + if (ret < 0) + goto out; + warnings += ret; + + ret = validate_uncallable_instructions(&file); + if (ret < 0) + goto out; + warnings += ret; + +out: + cleanup(&file); + + /* ignore warnings for now until we get all the code cleaned up */ + if (ret || warnings) + return 0; + return 0; +} diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h new file mode 100644 index 000000000000..34d2ba78a616 --- /dev/null +++ b/tools/objtool/builtin.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef _BUILTIN_H +#define _BUILTIN_H + +extern int cmd_check(int argc, const char **argv); + +#endif /* _BUILTIN_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c new file mode 100644 index 000000000000..e11f6b69cce6 --- /dev/null +++ b/tools/objtool/elf.c @@ -0,0 +1,412 @@ +/* + * elf.c - ELF access library + * + * Adapted from kpatch (https://github.com/dynup/kpatch): + * Copyright (C) 2013-2015 Josh Poimboeuf <jpoimboe@redhat.com> + * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "elf.h" +#include "warn.h" + +struct section *find_section_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) + if (!strcmp(sec->name, name)) + return sec; + + return NULL; +} + +static struct section *find_section_by_index(struct elf *elf, + unsigned int idx) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) + if (sec->idx == idx) + return sec; + + return NULL; +} + +static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + hash_for_each_possible(sec->symbol_hash, sym, hash, idx) + if (sym->idx == idx) + return sym; + + return NULL; +} + +struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + sym->offset == offset) + return sym; + + return NULL; +} + +struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, + unsigned int len) +{ + struct rela *rela; + unsigned long o; + + if (!sec->rela) + return NULL; + + for (o = offset; o < offset + len; o++) + hash_for_each_possible(sec->rela->rela_hash, rela, hash, o) + if (rela->offset == o) + return rela; + + return NULL; +} + +struct rela *find_rela_by_dest(struct section *sec, unsigned long offset) +{ + return find_rela_by_dest_range(sec, offset, 1); +} + +struct symbol *find_containing_func(struct section *sec, unsigned long offset) +{ + struct symbol *func; + + list_for_each_entry(func, &sec->symbol_list, list) + if (func->type == STT_FUNC && offset >= func->offset && + offset < func->offset + func->len) + return func; + + return NULL; +} + +static int read_sections(struct elf *elf) +{ + Elf_Scn *s = NULL; + struct section *sec; + size_t shstrndx, sections_nr; + int i; + + if (elf_getshdrnum(elf->elf, §ions_nr)) { + perror("elf_getshdrnum"); + return -1; + } + + if (elf_getshdrstrndx(elf->elf, &shstrndx)) { + perror("elf_getshdrstrndx"); + return -1; + } + + for (i = 0; i < sections_nr; i++) { + sec = malloc(sizeof(*sec)); + if (!sec) { + perror("malloc"); + return -1; + } + memset(sec, 0, sizeof(*sec)); + + INIT_LIST_HEAD(&sec->symbol_list); + INIT_LIST_HEAD(&sec->rela_list); + hash_init(sec->rela_hash); + hash_init(sec->symbol_hash); + + list_add_tail(&sec->list, &elf->sections); + + s = elf_getscn(elf->elf, i); + if (!s) { + perror("elf_getscn"); + return -1; + } + + sec->idx = elf_ndxscn(s); + + if (!gelf_getshdr(s, &sec->sh)) { + perror("gelf_getshdr"); + return -1; + } + + sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); + if (!sec->name) { + perror("elf_strptr"); + return -1; + } + + sec->elf_data = elf_getdata(s, NULL); + if (!sec->elf_data) { + perror("elf_getdata"); + return -1; + } + + if (sec->elf_data->d_off != 0 || + sec->elf_data->d_size != sec->sh.sh_size) { + WARN("unexpected data attributes for %s", sec->name); + return -1; + } + + sec->data = (unsigned long)sec->elf_data->d_buf; + sec->len = sec->elf_data->d_size; + } + + /* sanity check, one more call to elf_nextscn() should return NULL */ + if (elf_nextscn(elf->elf, s)) { + WARN("section entry mismatch"); + return -1; + } + + return 0; +} + +static int read_symbols(struct elf *elf) +{ + struct section *symtab; + struct symbol *sym; + struct list_head *entry, *tmp; + int symbols_nr, i; + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("missing symbol table"); + return -1; + } + + symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize; + + for (i = 0; i < symbols_nr; i++) { + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return -1; + } + memset(sym, 0, sizeof(*sym)); + + sym->idx = i; + + if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) { + perror("gelf_getsym"); + goto err; + } + + sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, + sym->sym.st_name); + if (!sym->name) { + perror("elf_strptr"); + goto err; + } + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + if (sym->sym.st_shndx > SHN_UNDEF && + sym->sym.st_shndx < SHN_LORESERVE) { + sym->sec = find_section_by_index(elf, + sym->sym.st_shndx); + if (!sym->sec) { + WARN("couldn't find section for symbol %s", + sym->name); + goto err; + } + if (sym->type == STT_SECTION) { + sym->name = sym->sec->name; + sym->sec->sym = sym; + } + } else + sym->sec = find_section_by_index(elf, 0); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + /* sorted insert into a per-section list */ + entry = &sym->sec->symbol_list; + list_for_each_prev(tmp, &sym->sec->symbol_list) { + struct symbol *s; + + s = list_entry(tmp, struct symbol, list); + + if (sym->offset > s->offset) { + entry = tmp; + break; + } + + if (sym->offset == s->offset && sym->len >= s->len) { + entry = tmp; + break; + } + } + list_add(&sym->list, entry); + hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); + } + + return 0; + +err: + free(sym); + return -1; +} + +static int read_relas(struct elf *elf) +{ + struct section *sec; + struct rela *rela; + int i; + unsigned int symndx; + + list_for_each_entry(sec, &elf->sections, list) { + if (sec->sh.sh_type != SHT_RELA) + continue; + + sec->base = find_section_by_name(elf, sec->name + 5); + if (!sec->base) { + WARN("can't find base section for rela section %s", + sec->name); + return -1; + } + + sec->base->rela = sec; + + for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) { + rela = malloc(sizeof(*rela)); + if (!rela) { + perror("malloc"); + return -1; + } + memset(rela, 0, sizeof(*rela)); + + if (!gelf_getrela(sec->elf_data, i, &rela->rela)) { + perror("gelf_getrela"); + return -1; + } + + rela->type = GELF_R_TYPE(rela->rela.r_info); + rela->addend = rela->rela.r_addend; + rela->offset = rela->rela.r_offset; + symndx = GELF_R_SYM(rela->rela.r_info); + rela->sym = find_symbol_by_index(elf, symndx); + if (!rela->sym) { + WARN("can't find rela entry symbol %d for %s", + symndx, sec->name); + return -1; + } + + list_add_tail(&rela->list, &sec->rela_list); + hash_add(sec->rela_hash, &rela->hash, rela->offset); + + } + } + + return 0; +} + +struct elf *elf_open(const char *name) +{ + struct elf *elf; + + elf_version(EV_CURRENT); + + elf = malloc(sizeof(*elf)); + if (!elf) { + perror("malloc"); + return NULL; + } + memset(elf, 0, sizeof(*elf)); + + INIT_LIST_HEAD(&elf->sections); + + elf->name = strdup(name); + if (!elf->name) { + perror("strdup"); + goto err; + } + + elf->fd = open(name, O_RDONLY); + if (elf->fd == -1) { + perror("open"); + goto err; + } + + elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); + if (!elf->elf) { + perror("elf_begin"); + goto err; + } + + if (!gelf_getehdr(elf->elf, &elf->ehdr)) { + perror("gelf_getehdr"); + goto err; + } + + if (read_sections(elf)) + goto err; + + if (read_symbols(elf)) + goto err; + + if (read_relas(elf)) + goto err; + + return elf; + +err: + elf_close(elf); + return NULL; +} + +void elf_close(struct elf *elf) +{ + struct section *sec, *tmpsec; + struct symbol *sym, *tmpsym; + struct rela *rela, *tmprela; + + list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) { + list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) { + list_del(&sym->list); + hash_del(&sym->hash); + free(sym); + } + list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) { + list_del(&rela->list); + hash_del(&rela->hash); + free(rela); + } + list_del(&sec->list); + free(sec); + } + if (elf->name) + free(elf->name); + if (elf->fd > 0) + close(elf->fd); + if (elf->elf) + elf_end(elf->elf); + free(elf); +} diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h new file mode 100644 index 000000000000..7f3e00a2f907 --- /dev/null +++ b/tools/objtool/elf.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _OBJTOOL_ELF_H +#define _OBJTOOL_ELF_H + +#include <stdio.h> +#include <gelf.h> +#include <linux/list.h> +#include <linux/hashtable.h> + +struct section { + struct list_head list; + GElf_Shdr sh; + struct list_head symbol_list; + DECLARE_HASHTABLE(symbol_hash, 8); + struct list_head rela_list; + DECLARE_HASHTABLE(rela_hash, 16); + struct section *base, *rela; + struct symbol *sym; + Elf_Data *elf_data; + char *name; + int idx; + unsigned long data; + unsigned int len; +}; + +struct symbol { + struct list_head list; + struct hlist_node hash; + GElf_Sym sym; + struct section *sec; + char *name; + unsigned int idx; + unsigned char bind, type; + unsigned long offset; + unsigned int len; +}; + +struct rela { + struct list_head list; + struct hlist_node hash; + GElf_Rela rela; + struct symbol *sym; + unsigned int type; + unsigned long offset; + int addend; +}; + +struct elf { + Elf *elf; + GElf_Ehdr ehdr; + int fd; + char *name; + struct list_head sections; + DECLARE_HASHTABLE(rela_hash, 16); +}; + + +struct elf *elf_open(const char *name); +struct section *find_section_by_name(struct elf *elf, const char *name); +struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, + unsigned int len); +struct symbol *find_containing_func(struct section *sec, unsigned long offset); +void elf_close(struct elf *elf); + + + +#endif /* _OBJTOOL_ELF_H */ diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c new file mode 100644 index 000000000000..46c326db4f46 --- /dev/null +++ b/tools/objtool/objtool.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * objtool: + * + * The 'check' subcmd analyzes every .o file and ensures the validity of its + * stack trace metadata. It enforces a set of rules on asm code and C inline + * assembly code so that stack traces can be reliable. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + +#include <stdio.h> +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> +#include <subcmd/exec-cmd.h> +#include <subcmd/pager.h> + +#include "builtin.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +struct cmd_struct { + const char *name; + int (*fn)(int, const char **); + const char *help; +}; + +static const char objtool_usage_string[] = + "objtool [OPTIONS] COMMAND [ARGS]"; + +static struct cmd_struct objtool_cmds[] = { + {"check", cmd_check, "Perform stack metadata validation on an object file" }, +}; + +bool help; + +static void cmd_usage(void) +{ + unsigned int i, longest = 0; + + printf("\n usage: %s\n\n", objtool_usage_string); + + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + if (longest < strlen(objtool_cmds[i].name)) + longest = strlen(objtool_cmds[i].name); + } + + puts(" Commands:"); + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + printf(" %-*s ", longest, objtool_cmds[i].name); + puts(objtool_cmds[i].help); + } + + printf("\n"); + + exit(1); +} + +static void handle_options(int *argc, const char ***argv) +{ + while (*argc > 0) { + const char *cmd = (*argv)[0]; + + if (cmd[0] != '-') + break; + + if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) { + help = true; + break; + } else { + fprintf(stderr, "Unknown option: %s\n", cmd); + fprintf(stderr, "\n Usage: %s\n", + objtool_usage_string); + exit(1); + } + + (*argv)++; + (*argc)--; + } +} + +static void handle_internal_command(int argc, const char **argv) +{ + const char *cmd = argv[0]; + unsigned int i, ret; + + for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { + struct cmd_struct *p = objtool_cmds+i; + + if (strcmp(p->name, cmd)) + continue; + + ret = p->fn(argc, argv); + + exit(ret); + } + + cmd_usage(); +} + +int main(int argc, const char **argv) +{ + static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED"; + + /* libsubcmd init */ + exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED); + pager_init(UNUSED); + + argv++; + argc--; + handle_options(&argc, &argv); + + if (!argc || help) + cmd_usage(); + + handle_internal_command(argc, argv); + + return 0; +} diff --git a/tools/objtool/special.c b/tools/objtool/special.c new file mode 100644 index 000000000000..bff8abb3a4aa --- /dev/null +++ b/tools/objtool/special.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This file reads all the special sections which have alternate instructions + * which can be patched in or redirected to at runtime. + */ + +#include <stdlib.h> +#include <string.h> + +#include "special.h" +#include "warn.h" + +#define EX_ENTRY_SIZE 12 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 24 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 8 + +#define ALT_ENTRY_SIZE 13 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#define X86_FEATURE_POPCNT (4*32+23) + +struct special_entry { + const char *sec; + bool group, jump_or_nop; + unsigned char size, orig, new; + unsigned char orig_len, new_len; /* group only */ + unsigned char feature; /* ALTERNATIVE macro CPU feature */ +}; + +struct special_entry entries[] = { + { + .sec = ".altinstructions", + .group = true, + .size = ALT_ENTRY_SIZE, + .orig = ALT_ORIG_OFFSET, + .orig_len = ALT_ORIG_LEN_OFFSET, + .new = ALT_NEW_OFFSET, + .new_len = ALT_NEW_LEN_OFFSET, + .feature = ALT_FEATURE_OFFSET, + }, + { + .sec = "__jump_table", + .jump_or_nop = true, + .size = JUMP_ENTRY_SIZE, + .orig = JUMP_ORIG_OFFSET, + .new = JUMP_NEW_OFFSET, + }, + { + .sec = "__ex_table", + .size = EX_ENTRY_SIZE, + .orig = EX_ORIG_OFFSET, + .new = EX_NEW_OFFSET, + }, + {}, +}; + +static int get_alt_entry(struct elf *elf, struct special_entry *entry, + struct section *sec, int idx, + struct special_alt *alt) +{ + struct rela *orig_rela, *new_rela; + unsigned long offset; + + offset = idx * entry->size; + + alt->group = entry->group; + alt->jump_or_nop = entry->jump_or_nop; + + if (alt->group) { + alt->orig_len = *(unsigned char *)(sec->data + offset + + entry->orig_len); + alt->new_len = *(unsigned char *)(sec->data + offset + + entry->new_len); + } + + if (entry->feature) { + unsigned short feature; + + feature = *(unsigned short *)(sec->data + offset + + entry->feature); + + /* + * It has been requested that we don't validate the !POPCNT + * feature path which is a "very very small percentage of + * machines". + */ + if (feature == X86_FEATURE_POPCNT) + alt->skip_orig = true; + } + + orig_rela = find_rela_by_dest(sec, offset + entry->orig); + if (!orig_rela) { + WARN_FUNC("can't find orig rela", sec, offset + entry->orig); + return -1; + } + if (orig_rela->sym->type != STT_SECTION) { + WARN_FUNC("don't know how to handle non-section rela symbol %s", + sec, offset + entry->orig, orig_rela->sym->name); + return -1; + } + + alt->orig_sec = orig_rela->sym->sec; + alt->orig_off = orig_rela->addend; + + if (!entry->group || alt->new_len) { + new_rela = find_rela_by_dest(sec, offset + entry->new); + if (!new_rela) { + WARN_FUNC("can't find new rela", + sec, offset + entry->new); + return -1; + } + + alt->new_sec = new_rela->sym->sec; + alt->new_off = (unsigned int)new_rela->addend; + + /* _ASM_EXTABLE_EX hack */ + if (alt->new_off >= 0x7ffffff0) + alt->new_off -= 0x7ffffff0; + } + + return 0; +} + +/* + * Read all the special sections and create a list of special_alt structs which + * describe all the alternate instructions which can be patched in or + * redirected to at runtime. + */ +int special_get_alts(struct elf *elf, struct list_head *alts) +{ + struct special_entry *entry; + struct section *sec; + unsigned int nr_entries; + struct special_alt *alt; + int idx, ret; + + INIT_LIST_HEAD(alts); + + for (entry = entries; entry->sec; entry++) { + sec = find_section_by_name(elf, entry->sec); + if (!sec) + continue; + + if (sec->len % entry->size != 0) { + WARN("%s size not a multiple of %d", + sec->name, entry->size); + return -1; + } + + nr_entries = sec->len / entry->size; + + for (idx = 0; idx < nr_entries; idx++) { + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + memset(alt, 0, sizeof(*alt)); + + ret = get_alt_entry(elf, entry, sec, idx, alt); + if (ret) + return ret; + + list_add_tail(&alt->list, alts); + } + } + + return 0; +} diff --git a/tools/objtool/special.h b/tools/objtool/special.h new file mode 100644 index 000000000000..fad1d092f679 --- /dev/null +++ b/tools/objtool/special.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SPECIAL_H +#define _SPECIAL_H + +#include <stdbool.h> +#include "elf.h" + +struct special_alt { + struct list_head list; + + bool group; + bool skip_orig; + bool jump_or_nop; + + struct section *orig_sec; + unsigned long orig_off; + + struct section *new_sec; + unsigned long new_off; + + unsigned int orig_len, new_len; /* group only */ +}; + +int special_get_alts(struct elf *elf, struct list_head *alts); + +#endif /* _SPECIAL_H */ diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h new file mode 100644 index 000000000000..ac7e07523e84 --- /dev/null +++ b/tools/objtool/warn.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _WARN_H +#define _WARN_H + +extern const char *objname; + +static inline char *offstr(struct section *sec, unsigned long offset) +{ + struct symbol *func; + char *name, *str; + unsigned long name_off; + + func = find_containing_func(sec, offset); + if (func) { + name = func->name; + name_off = offset - func->offset; + } else { + name = sec->name; + name_off = offset; + } + + str = malloc(strlen(name) + 20); + + if (func) + sprintf(str, "%s()+0x%lx", name, name_off); + else + sprintf(str, "%s+0x%lx", name, name_off); + + return str; +} + +#define WARN(format, ...) \ + fprintf(stderr, \ + "%s: warning: objtool: " format "\n", \ + objname, ##__VA_ARGS__) + +#define WARN_FUNC(format, sec, offset, ...) \ +({ \ + char *_str = offstr(sec, offset); \ + WARN("%s: " format, _str, ##__VA_ARGS__); \ + free(_str); \ +}) + +#endif /* _WARN_H */ diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 3ba1c0b09908..098cfb9ca8f0 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../config/utilities.mak +include ../../scripts/utilities.mak MAN1_TXT= \ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index be764f9ec769..c6c8318e38a2 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -672,6 +672,7 @@ The letters are: d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events "Instructions" events look like they were recorded by "perf record -e instructions". @@ -730,6 +731,12 @@ from one sample to the next. To disable trace decoding entirely, use the option --no-itrace. +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. dump option ----------- diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 65453f4c7006..e2a4c5e0dbe5 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -7,6 +7,7 @@ d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events The default is all events i.e. the same as --itrace=ibxe @@ -24,3 +25,10 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. + + It is also possible to skip events generated (instructions, branches, transactions) + at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + + skips the first million instructions. diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e9cd39a92dc2..778f54d4d0bd 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -33,7 +33,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index d1deb573877f..3e9490b9c533 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 79483f40e991..a126e97a8114 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -40,10 +40,12 @@ address should be. The 'p' modifier can be specified multiple times: 0 - SAMPLE_IP can have arbitrary skid 1 - SAMPLE_IP must have constant skid 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid + 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid + sample shadowing effects. For Intel systems precise event sampling is implemented with PEBS -which supports up to precise-level 2. +which supports up to precise-level 2, and precise level 3 for +some special cases On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU @@ -91,6 +93,67 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +ARBITRARY PMUS +-------------- + +perf also supports an extended syntax for specifying raw parameters +to PMUs. Using this typically requires looking up the specific event +in the CPU vendor specific documentation. + +The available PMUs and their raw parameters can be listed with + + ls /sys/devices/*/format + +For example the raw event "LSD.UOPS" core pmu event above could +be specified as + + perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ... + +PER SOCKET PMUS +--------------- + +Some PMUs are not associated with a core, but with a whole CPU socket. +Events on these PMUs generally cannot be sampled, but only counted globally +with perf stat -a. They can be bound to one logical CPU, but will measure +all the CPUs in the same socket. + +This example measures memory bandwidth every second +on the first memory controller on socket 0 of a Intel Xeon system + + perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ... + +Each memory controller has its own PMU. Measuring the complete system +bandwidth would require specifying all imc PMUs (see perf list output), +and adding the values together. + +This example measures the combined core power every second + + perf stat -I 1000 -e power/energy-cores/ -a + +ACCESS RESTRICTIONS +------------------- + +For non root users generally only context switched PMU events are available. +This is normally only the events in the cpu PMU, the predefined events +like cycles and instructions and some software events. + +Other PMUs and global measurements are normally root only. +Some event qualifiers, such as "any", are also root only. + +This can be overriden by setting the kernel.perf_event_paranoid +sysctl to -1, which allows non root to use these events. + +For accessing trace point events perf needs to have read access to +/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed +setting. + +TRACING +------- + +Some PMUs control advanced hardware tracing capabilities, such as Intel PT, +that allows low overhead execution tracing. These are described in a separate +intel-pt.txt document. + PARAMETERIZED EVENTS -------------------- @@ -104,6 +167,50 @@ also be supplied. For example: perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... +EVENT GROUPS +------------ + +Perf supports time based multiplexing of events, when the number of events +active exceeds the number of hardware performance counters. Multiplexing +can cause measurement errors when the workload changes its execution +profile. + +When metrics are computed using formulas from event counts, it is useful to +ensure some events are always measured together as a group to minimize multiplexing +errors. Event groups can be specified using { }. + + perf stat -e '{instructions,cycles}' ... + +The number of available performance counters depend on the CPU. A group +cannot contain more events than available counters. +For example Intel Core CPUs typically have four generic performance counters +for the core, plus three fixed counters for instructions, cycles and +ref-cycles. Some special events have restrictions on which counter they +can schedule, and may not support multiple instances in a single group. +When too many events are specified in the group none of them will not +be measured. + +Globally pinned events can limit the number of counters available for +other groups. On x86 systems, the NMI watchdog pins a counter by default. +The nmi watchdog can be disabled as root with + + echo 0 > /proc/sys/kernel/nmi_watchdog + +Events from multiple different PMUs cannot be mixed in a group, with +some exceptions for software events. + +LEADER SAMPLING +--------------- + +perf also supports group leader sampling using the :S specifier. + + perf record -e '{cycles,instructions}:S' ... + perf report --group + +Normally all events in a event group sample, but with :S only +the first event (the leader) samples, and it only reads the values of the +other events in the group. + OPTIONS ------- @@ -141,5 +248,5 @@ SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], -http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], +http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 43310d8661fe..1d6092c460dd 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -48,6 +48,14 @@ OPTIONS option can be passed in record mode. It will be interpreted the same way as perf record. +-K:: +--all-kernel:: + Configure all used events to run in kernel space. + +-U:: +--all-user:: + Configure all used events to run in user space. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 19aa17532a16..8dbee832abd9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -347,6 +347,19 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--timestamp-filename +Append timestamp to output file name. + +--switch-output:: +Generate multiple perf.data files, timestamp prefixed, switching to a new one +when receiving a SIGUSR2. + +A possible use case is to, given an external event, slice the perf.data file +that gets then processed, possibly via a perf script, to decide if that +particular perf.data snapshot should be kept or not. + +Implies --timestamp-filename, --no-buildid and --no-buildid-cache. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 12113992ac9d..ebaf849e30ef 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -248,7 +248,7 @@ OPTIONS Note that when using the --itrace option the synthesized callchain size will override this value if the synthesized callchain size is bigger. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. -G:: --inverted:: @@ -285,7 +285,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 8ff4df956951..1cc08cc47ac5 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -50,6 +50,22 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the sched data. +OPTIONS for 'perf sched map' +---------------------------- + +--compact:: + Show only CPUs with activity. Helps visualizing on high core + count systems. + +--cpus:: + Show just entries with activities for the given CPUs. + +--color-cpus:: + Highlight the given cpus. + +--color-pids:: + Highlight the given pids. + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 382ddfb45d1d..a856a1095893 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -259,9 +259,23 @@ include::itrace.txt[] --full-source-path:: Show the full path for source files for srcline output. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + Note that when using the --itrace option the synthesized callchain size + will override this value if the synthesized callchain size is bigger. + + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. + --ns:: Use 9 decimal places when displaying time (i.e. show the nanoseconds) +-f:: +--force:: + Don't do ownership validation. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 19f046f027cd..91d638df3a6b 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -177,7 +177,7 @@ Default is to monitor all CPUS. between information loss and faster processing especially for workloads that can have a very long callchain stack. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. --ignore-callees=<regex>:: Ignore callees of the function(s) matching the given regex. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 13293de8869f..6afe20121bc0 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -117,9 +117,41 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--call-graph [mode,type,min[,limit],order[,key][,branch]]:: + Setup and enable call-graph (stack chain/backtrace) recording. + See `--call-graph` section in perf-record and perf-report + man pages for details. The ones that are most useful in 'perf trace' + are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'. + + Using this will, for the root user, bump the value of --mmap-pages to 4 + times the maximum for non-root users, based on the kernel.perf_event_mlock_kb + sysctl. This is done only if the user doesn't specify a --mmap-pages value. + +--kernel-syscall-graph:: + Show the kernel callchains on the syscall exit path. + --event:: Trace other events, see 'perf list' for a complete list. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. Note that at this point + this is just about the presentation part, i.e. the kernel is still + not limiting, the overhead of callchains needs to be set via the + knobs in --call-graph dwarf. + + Implies '--call-graph dwarf' when --call-graph not present on the + command line, on systems where DWARF unwinding was built in. + + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. + +--min-stack:: + Set the stack depth limit when parsing the callchain, anything + below the specified depth will be ignored. Disabled by default. + + Implies '--call-graph dwarf' when --call-graph not present on the + command line, on systems where DWARF unwinding was built in. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 2e1fa2357528..8c8c6b9ce915 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S +arch/*/include/asm/*features.h include/linux/poison.h include/linux/hw_breakpoint.h include/uapi/linux/perf_event.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4a4fad4182f5..bde8cbae7dd9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -3,7 +3,7 @@ include ../scripts/Makefile.include # The default target of this Makefile is... all: -include config/utilities.mak +include ../scripts/utilities.mak # Define V to have a more verbose compile. # @@ -183,6 +183,11 @@ endif include config/Makefile endif +ifeq ($(config),0) +include $(srctree)/tools/scripts/Makefile.arch +-include arch/$(ARCH)/Makefile +endif + # The FEATURE_DUMP_EXPORT holds location of the actual # FEATURE_DUMP file to be used to bypass feature detection # (for bpf or any other subproject) @@ -297,8 +302,6 @@ endif # because maintaining the nesting to match is a pain. If # we had "elif" things would have been much nicer... --include arch/$(ARCH)/Makefile - ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif @@ -390,7 +393,7 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -430,7 +433,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): fixdep FORCE +$(LIBPERF_IN): prepare fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -625,7 +628,7 @@ config-clean: $(call QUIET_CLEAN, config) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -662,5 +665,5 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare -.PHONY: libtraceevent_plugins +.PHONY: libtraceevent_plugins archheaders diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 56e05f126ad8..cc3930904d68 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h new file mode 100644 index 000000000000..75de0e92e71e --- /dev/null +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -0,0 +1,69 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#include <asm/perf_regs.h> + +#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_POWERPC_MAX +#ifdef __powerpc64__ + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 +#else + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 +#endif + +#define PERF_REG_IP PERF_REG_POWERPC_NIP +#define PERF_REG_SP PERF_REG_POWERPC_R1 + +static const char *reg_names[] = { + [PERF_REG_POWERPC_R0] = "r0", + [PERF_REG_POWERPC_R1] = "r1", + [PERF_REG_POWERPC_R2] = "r2", + [PERF_REG_POWERPC_R3] = "r3", + [PERF_REG_POWERPC_R4] = "r4", + [PERF_REG_POWERPC_R5] = "r5", + [PERF_REG_POWERPC_R6] = "r6", + [PERF_REG_POWERPC_R7] = "r7", + [PERF_REG_POWERPC_R8] = "r8", + [PERF_REG_POWERPC_R9] = "r9", + [PERF_REG_POWERPC_R10] = "r10", + [PERF_REG_POWERPC_R11] = "r11", + [PERF_REG_POWERPC_R12] = "r12", + [PERF_REG_POWERPC_R13] = "r13", + [PERF_REG_POWERPC_R14] = "r14", + [PERF_REG_POWERPC_R15] = "r15", + [PERF_REG_POWERPC_R16] = "r16", + [PERF_REG_POWERPC_R17] = "r17", + [PERF_REG_POWERPC_R18] = "r18", + [PERF_REG_POWERPC_R19] = "r19", + [PERF_REG_POWERPC_R20] = "r20", + [PERF_REG_POWERPC_R21] = "r21", + [PERF_REG_POWERPC_R22] = "r22", + [PERF_REG_POWERPC_R23] = "r23", + [PERF_REG_POWERPC_R24] = "r24", + [PERF_REG_POWERPC_R25] = "r25", + [PERF_REG_POWERPC_R26] = "r26", + [PERF_REG_POWERPC_R27] = "r27", + [PERF_REG_POWERPC_R28] = "r28", + [PERF_REG_POWERPC_R29] = "r29", + [PERF_REG_POWERPC_R30] = "r30", + [PERF_REG_POWERPC_R31] = "r31", + [PERF_REG_POWERPC_NIP] = "nip", + [PERF_REG_POWERPC_MSR] = "msr", + [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", + [PERF_REG_POWERPC_CTR] = "ctr", + [PERF_REG_POWERPC_LINK] = "link", + [PERF_REG_POWERPC_XER] = "xer", + [PERF_REG_POWERPC_CCR] = "ccr", + [PERF_REG_POWERPC_SOFTE] = "softe", + [PERF_REG_POWERPC_TRAP] = "trap", + [PERF_REG_POWERPC_DAR] = "dar", + [PERF_REG_POWERPC_DSISR] = "dsisr" +}; + +static inline const char *perf_reg_name(int id) +{ + return reg_names[id]; +} +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index c8fe2074d217..90ad64b231cd 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,6 +1,8 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 733151cdf46e..41bdf9530d82 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -10,19 +10,26 @@ */ #include <stddef.h> +#include <errno.h> +#include <string.h> #include <dwarf-regs.h> - +#include <linux/ptrace.h> +#include <linux/kernel.h> +#include "util.h" struct pt_regs_dwarfnum { const char *name; unsigned int dwarfnum; + unsigned int ptregs_offset; }; -#define STR(s) #s -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%gpr##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} +#define REG_DWARFNUM_NAME(r, num) \ + {.name = STR(%)STR(r), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, r)} +#define GPR_DWARFNUM_NAME(num) \ + {.name = STR(%gpr##num), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, gpr[num])} +#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} /* * Reference: @@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = { GPR_DWARFNUM_NAME(29), GPR_DWARFNUM_NAME(30), GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME("%msr", 66), - REG_DWARFNUM_NAME("%ctr", 109), - REG_DWARFNUM_NAME("%link", 108), - REG_DWARFNUM_NAME("%xer", 101), - REG_DWARFNUM_NAME("%dar", 119), - REG_DWARFNUM_NAME("%dsisr", 118), + REG_DWARFNUM_NAME(msr, 66), + REG_DWARFNUM_NAME(ctr, 109), + REG_DWARFNUM_NAME(link, 108), + REG_DWARFNUM_NAME(xer, 101), + REG_DWARFNUM_NAME(dar, 119), + REG_DWARFNUM_NAME(dsisr, 118), REG_DWARFNUM_END, }; @@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n) return roff->name; return NULL; } + +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_dwarfnum *roff; + for (roff = regdwarfnum_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->ptregs_offset; + return -EINVAL; +} diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a75db09..f8ccee132867 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -3,9 +3,9 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> - -#include "../../util/header.h" -#include "../../util/util.h" +#include <linux/stringify.h> +#include "header.h" +#include "util.h" #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c new file mode 100644 index 000000000000..a3c3e1ce6807 --- /dev/null +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -0,0 +1,49 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(r0, PERF_REG_POWERPC_R0), + SMPL_REG(r1, PERF_REG_POWERPC_R1), + SMPL_REG(r2, PERF_REG_POWERPC_R2), + SMPL_REG(r3, PERF_REG_POWERPC_R3), + SMPL_REG(r4, PERF_REG_POWERPC_R4), + SMPL_REG(r5, PERF_REG_POWERPC_R5), + SMPL_REG(r6, PERF_REG_POWERPC_R6), + SMPL_REG(r7, PERF_REG_POWERPC_R7), + SMPL_REG(r8, PERF_REG_POWERPC_R8), + SMPL_REG(r9, PERF_REG_POWERPC_R9), + SMPL_REG(r10, PERF_REG_POWERPC_R10), + SMPL_REG(r11, PERF_REG_POWERPC_R11), + SMPL_REG(r12, PERF_REG_POWERPC_R12), + SMPL_REG(r13, PERF_REG_POWERPC_R13), + SMPL_REG(r14, PERF_REG_POWERPC_R14), + SMPL_REG(r15, PERF_REG_POWERPC_R15), + SMPL_REG(r16, PERF_REG_POWERPC_R16), + SMPL_REG(r17, PERF_REG_POWERPC_R17), + SMPL_REG(r18, PERF_REG_POWERPC_R18), + SMPL_REG(r19, PERF_REG_POWERPC_R19), + SMPL_REG(r20, PERF_REG_POWERPC_R20), + SMPL_REG(r21, PERF_REG_POWERPC_R21), + SMPL_REG(r22, PERF_REG_POWERPC_R22), + SMPL_REG(r23, PERF_REG_POWERPC_R23), + SMPL_REG(r24, PERF_REG_POWERPC_R24), + SMPL_REG(r25, PERF_REG_POWERPC_R25), + SMPL_REG(r26, PERF_REG_POWERPC_R26), + SMPL_REG(r27, PERF_REG_POWERPC_R27), + SMPL_REG(r28, PERF_REG_POWERPC_R28), + SMPL_REG(r29, PERF_REG_POWERPC_R29), + SMPL_REG(r30, PERF_REG_POWERPC_R30), + SMPL_REG(r31, PERF_REG_POWERPC_R31), + SMPL_REG(nip, PERF_REG_POWERPC_NIP), + SMPL_REG(msr, PERF_REG_POWERPC_MSR), + SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3), + SMPL_REG(ctr, PERF_REG_POWERPC_CTR), + SMPL_REG(link, PERF_REG_POWERPC_LINK), + SMPL_REG(xer, PERF_REG_POWERPC_XER), + SMPL_REG(ccr, PERF_REG_POWERPC_CCR), + SMPL_REG(softe, PERF_REG_POWERPC_SOFTE), + SMPL_REG(trap, PERF_REG_POWERPC_TRAP), + SMPL_REG(dar, PERF_REG_POWERPC_DAR), + SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), + SMPL_REG_END +}; diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index bbc1a50768dd..c6d0f91731a1 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } -#if defined(_CALL_ELF) && _CALL_ELF == 2 -void arch__elf_sym_adjust(GElf_Sym *sym) -{ - sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); -} -#endif #endif #if !defined(_CALL_ELF) || _CALL_ELF != 2 @@ -65,18 +59,45 @@ bool arch__prefers_symtab(void) return true; } +#ifdef HAVE_LIBELF_SUPPORT +void arch__sym_update(struct symbol *s, GElf_Sym *sym) +{ + s->arch_sym = sym->st_other; +} +#endif + #define PPC64LE_LEP_OFFSET 8 void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map) + struct probe_trace_event *tev, struct map *map, + struct symbol *sym) { + int lep_offset; + /* - * ppc64 ABIv2 local entry point is currently always 2 instructions - * (8 bytes) after the global entry point. + * When probing at a function entry point, we normally always want the + * LEP since that catches calls to the function through both the GEP and + * the LEP. Hence, we would like to probe at an offset of 8 bytes if + * the user only specified the function entry. + * + * However, if the user specifies an offset, we fall back to using the + * GEP since all userspace applications (objdump/readelf) show function + * disassembly with offsets from the GEP. + * + * In addition, we shouldn't specify an offset for kretprobes. */ - if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { - tev->point.address += PPC64LE_LEP_OFFSET; + if (pev->point.offset || pev->point.retprobe || !map || !sym) + return; + + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); + + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) tev->point.offset += PPC64LE_LEP_OFFSET; + else if (lep_offset) { + if (pev->uprobes) + tev->point.address += lep_offset; + else + tev->point.offset += lep_offset; } } #endif diff --git a/tools/perf/arch/powerpc/util/unwind-libunwind.c b/tools/perf/arch/powerpc/util/unwind-libunwind.c new file mode 100644 index 000000000000..9e15f92ae49f --- /dev/null +++ b/tools/perf/arch/powerpc/util/unwind-libunwind.c @@ -0,0 +1,96 @@ +/* + * Copyright 2016 Chandan Kumar, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <errno.h> +#include <libunwind.h> +#include <asm/perf_regs.h> +#include "../../util/unwind.h" +#include "../../util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_PPC64_R0: + return PERF_REG_POWERPC_R0; + case UNW_PPC64_R1: + return PERF_REG_POWERPC_R1; + case UNW_PPC64_R2: + return PERF_REG_POWERPC_R2; + case UNW_PPC64_R3: + return PERF_REG_POWERPC_R3; + case UNW_PPC64_R4: + return PERF_REG_POWERPC_R4; + case UNW_PPC64_R5: + return PERF_REG_POWERPC_R5; + case UNW_PPC64_R6: + return PERF_REG_POWERPC_R6; + case UNW_PPC64_R7: + return PERF_REG_POWERPC_R7; + case UNW_PPC64_R8: + return PERF_REG_POWERPC_R8; + case UNW_PPC64_R9: + return PERF_REG_POWERPC_R9; + case UNW_PPC64_R10: + return PERF_REG_POWERPC_R10; + case UNW_PPC64_R11: + return PERF_REG_POWERPC_R11; + case UNW_PPC64_R12: + return PERF_REG_POWERPC_R12; + case UNW_PPC64_R13: + return PERF_REG_POWERPC_R13; + case UNW_PPC64_R14: + return PERF_REG_POWERPC_R14; + case UNW_PPC64_R15: + return PERF_REG_POWERPC_R15; + case UNW_PPC64_R16: + return PERF_REG_POWERPC_R16; + case UNW_PPC64_R17: + return PERF_REG_POWERPC_R17; + case UNW_PPC64_R18: + return PERF_REG_POWERPC_R18; + case UNW_PPC64_R19: + return PERF_REG_POWERPC_R19; + case UNW_PPC64_R20: + return PERF_REG_POWERPC_R20; + case UNW_PPC64_R21: + return PERF_REG_POWERPC_R21; + case UNW_PPC64_R22: + return PERF_REG_POWERPC_R22; + case UNW_PPC64_R23: + return PERF_REG_POWERPC_R23; + case UNW_PPC64_R24: + return PERF_REG_POWERPC_R24; + case UNW_PPC64_R25: + return PERF_REG_POWERPC_R25; + case UNW_PPC64_R26: + return PERF_REG_POWERPC_R26; + case UNW_PPC64_R27: + return PERF_REG_POWERPC_R27; + case UNW_PPC64_R28: + return PERF_REG_POWERPC_R28; + case UNW_PPC64_R29: + return PERF_REG_POWERPC_R29; + case UNW_PPC64_R30: + return PERF_REG_POWERPC_R30; + case UNW_PPC64_R31: + return PERF_REG_POWERPC_R31; + case UNW_PPC64_LR: + return PERF_REG_POWERPC_LINK; + case UNW_PPC64_CTR: + return PERF_REG_POWERPC_CTR; + case UNW_PPC64_XER: + return PERF_REG_POWERPC_XER; + case UNW_PPC64_NIP: + return PERF_REG_POWERPC_NIP; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + return -EINVAL; +} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 269af2143735..6c9211b18ec0 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -4,3 +4,26 @@ endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 + +### +# Syscall table generation +# + +out := $(OUTPUT)arch/x86/include/generated/asm +header := $(out)/syscalls_64.c +sys := $(srctree)/tools/perf/arch/x86/entry/syscalls +systbl := $(sys)/syscalltbl.sh + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sys)/syscall_64.tbl $(systbl) + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ + || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true + $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ + +clean:: + $(call QUIET_CLEAN, x86) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl new file mode 100644 index 000000000000..cac6d17ce5db --- /dev/null +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -0,0 +1,376 @@ +# +# 64-bit system call numbers and entry vectors +# +# The format is: +# <number> <abi> <name> <entry point> +# +# The abi is "common", "64" or "x32" for this file. +# +0 common read sys_read +1 common write sys_write +2 common open sys_open +3 common close sys_close +4 common stat sys_newstat +5 common fstat sys_newfstat +6 common lstat sys_newlstat +7 common poll sys_poll +8 common lseek sys_lseek +9 common mmap sys_mmap +10 common mprotect sys_mprotect +11 common munmap sys_munmap +12 common brk sys_brk +13 64 rt_sigaction sys_rt_sigaction +14 common rt_sigprocmask sys_rt_sigprocmask +15 64 rt_sigreturn sys_rt_sigreturn/ptregs +16 64 ioctl sys_ioctl +17 common pread64 sys_pread64 +18 common pwrite64 sys_pwrite64 +19 64 readv sys_readv +20 64 writev sys_writev +21 common access sys_access +22 common pipe sys_pipe +23 common select sys_select +24 common sched_yield sys_sched_yield +25 common mremap sys_mremap +26 common msync sys_msync +27 common mincore sys_mincore +28 common madvise sys_madvise +29 common shmget sys_shmget +30 common shmat sys_shmat +31 common shmctl sys_shmctl +32 common dup sys_dup +33 common dup2 sys_dup2 +34 common pause sys_pause +35 common nanosleep sys_nanosleep +36 common getitimer sys_getitimer +37 common alarm sys_alarm +38 common setitimer sys_setitimer +39 common getpid sys_getpid +40 common sendfile sys_sendfile64 +41 common socket sys_socket +42 common connect sys_connect +43 common accept sys_accept +44 common sendto sys_sendto +45 64 recvfrom sys_recvfrom +46 64 sendmsg sys_sendmsg +47 64 recvmsg sys_recvmsg +48 common shutdown sys_shutdown +49 common bind sys_bind +50 common listen sys_listen +51 common getsockname sys_getsockname +52 common getpeername sys_getpeername +53 common socketpair sys_socketpair +54 64 setsockopt sys_setsockopt +55 64 getsockopt sys_getsockopt +56 common clone sys_clone/ptregs +57 common fork sys_fork/ptregs +58 common vfork sys_vfork/ptregs +59 64 execve sys_execve/ptregs +60 common exit sys_exit +61 common wait4 sys_wait4 +62 common kill sys_kill +63 common uname sys_newuname +64 common semget sys_semget +65 common semop sys_semop +66 common semctl sys_semctl +67 common shmdt sys_shmdt +68 common msgget sys_msgget +69 common msgsnd sys_msgsnd +70 common msgrcv sys_msgrcv +71 common msgctl sys_msgctl +72 common fcntl sys_fcntl +73 common flock sys_flock +74 common fsync sys_fsync +75 common fdatasync sys_fdatasync +76 common truncate sys_truncate +77 common ftruncate sys_ftruncate +78 common getdents sys_getdents +79 common getcwd sys_getcwd +80 common chdir sys_chdir +81 common fchdir sys_fchdir +82 common rename sys_rename +83 common mkdir sys_mkdir +84 common rmdir sys_rmdir +85 common creat sys_creat +86 common link sys_link +87 common unlink sys_unlink +88 common symlink sys_symlink +89 common readlink sys_readlink +90 common chmod sys_chmod +91 common fchmod sys_fchmod +92 common chown sys_chown +93 common fchown sys_fchown +94 common lchown sys_lchown +95 common umask sys_umask +96 common gettimeofday sys_gettimeofday +97 common getrlimit sys_getrlimit +98 common getrusage sys_getrusage +99 common sysinfo sys_sysinfo +100 common times sys_times +101 64 ptrace sys_ptrace +102 common getuid sys_getuid +103 common syslog sys_syslog +104 common getgid sys_getgid +105 common setuid sys_setuid +106 common setgid sys_setgid +107 common geteuid sys_geteuid +108 common getegid sys_getegid +109 common setpgid sys_setpgid +110 common getppid sys_getppid +111 common getpgrp sys_getpgrp +112 common setsid sys_setsid +113 common setreuid sys_setreuid +114 common setregid sys_setregid +115 common getgroups sys_getgroups +116 common setgroups sys_setgroups +117 common setresuid sys_setresuid +118 common getresuid sys_getresuid +119 common setresgid sys_setresgid +120 common getresgid sys_getresgid +121 common getpgid sys_getpgid +122 common setfsuid sys_setfsuid +123 common setfsgid sys_setfsgid +124 common getsid sys_getsid +125 common capget sys_capget +126 common capset sys_capset +127 64 rt_sigpending sys_rt_sigpending +128 64 rt_sigtimedwait sys_rt_sigtimedwait +129 64 rt_sigqueueinfo sys_rt_sigqueueinfo +130 common rt_sigsuspend sys_rt_sigsuspend +131 64 sigaltstack sys_sigaltstack +132 common utime sys_utime +133 common mknod sys_mknod +134 64 uselib +135 common personality sys_personality +136 common ustat sys_ustat +137 common statfs sys_statfs +138 common fstatfs sys_fstatfs +139 common sysfs sys_sysfs +140 common getpriority sys_getpriority +141 common setpriority sys_setpriority +142 common sched_setparam sys_sched_setparam +143 common sched_getparam sys_sched_getparam +144 common sched_setscheduler sys_sched_setscheduler +145 common sched_getscheduler sys_sched_getscheduler +146 common sched_get_priority_max sys_sched_get_priority_max +147 common sched_get_priority_min sys_sched_get_priority_min +148 common sched_rr_get_interval sys_sched_rr_get_interval +149 common mlock sys_mlock +150 common munlock sys_munlock +151 common mlockall sys_mlockall +152 common munlockall sys_munlockall +153 common vhangup sys_vhangup +154 common modify_ldt sys_modify_ldt +155 common pivot_root sys_pivot_root +156 64 _sysctl sys_sysctl +157 common prctl sys_prctl +158 common arch_prctl sys_arch_prctl +159 common adjtimex sys_adjtimex +160 common setrlimit sys_setrlimit +161 common chroot sys_chroot +162 common sync sys_sync +163 common acct sys_acct +164 common settimeofday sys_settimeofday +165 common mount sys_mount +166 common umount2 sys_umount +167 common swapon sys_swapon +168 common swapoff sys_swapoff +169 common reboot sys_reboot +170 common sethostname sys_sethostname +171 common setdomainname sys_setdomainname +172 common iopl sys_iopl/ptregs +173 common ioperm sys_ioperm +174 64 create_module +175 common init_module sys_init_module +176 common delete_module sys_delete_module +177 64 get_kernel_syms +178 64 query_module +179 common quotactl sys_quotactl +180 64 nfsservctl +181 common getpmsg +182 common putpmsg +183 common afs_syscall +184 common tuxcall +185 common security +186 common gettid sys_gettid +187 common readahead sys_readahead +188 common setxattr sys_setxattr +189 common lsetxattr sys_lsetxattr +190 common fsetxattr sys_fsetxattr +191 common getxattr sys_getxattr +192 common lgetxattr sys_lgetxattr +193 common fgetxattr sys_fgetxattr +194 common listxattr sys_listxattr +195 common llistxattr sys_llistxattr +196 common flistxattr sys_flistxattr +197 common removexattr sys_removexattr +198 common lremovexattr sys_lremovexattr +199 common fremovexattr sys_fremovexattr +200 common tkill sys_tkill +201 common time sys_time +202 common futex sys_futex +203 common sched_setaffinity sys_sched_setaffinity +204 common sched_getaffinity sys_sched_getaffinity +205 64 set_thread_area +206 64 io_setup sys_io_setup +207 common io_destroy sys_io_destroy +208 common io_getevents sys_io_getevents +209 64 io_submit sys_io_submit +210 common io_cancel sys_io_cancel +211 64 get_thread_area +212 common lookup_dcookie sys_lookup_dcookie +213 common epoll_create sys_epoll_create +214 64 epoll_ctl_old +215 64 epoll_wait_old +216 common remap_file_pages sys_remap_file_pages +217 common getdents64 sys_getdents64 +218 common set_tid_address sys_set_tid_address +219 common restart_syscall sys_restart_syscall +220 common semtimedop sys_semtimedop +221 common fadvise64 sys_fadvise64 +222 64 timer_create sys_timer_create +223 common timer_settime sys_timer_settime +224 common timer_gettime sys_timer_gettime +225 common timer_getoverrun sys_timer_getoverrun +226 common timer_delete sys_timer_delete +227 common clock_settime sys_clock_settime +228 common clock_gettime sys_clock_gettime +229 common clock_getres sys_clock_getres +230 common clock_nanosleep sys_clock_nanosleep +231 common exit_group sys_exit_group +232 common epoll_wait sys_epoll_wait +233 common epoll_ctl sys_epoll_ctl +234 common tgkill sys_tgkill +235 common utimes sys_utimes +236 64 vserver +237 common mbind sys_mbind +238 common set_mempolicy sys_set_mempolicy +239 common get_mempolicy sys_get_mempolicy +240 common mq_open sys_mq_open +241 common mq_unlink sys_mq_unlink +242 common mq_timedsend sys_mq_timedsend +243 common mq_timedreceive sys_mq_timedreceive +244 64 mq_notify sys_mq_notify +245 common mq_getsetattr sys_mq_getsetattr +246 64 kexec_load sys_kexec_load +247 64 waitid sys_waitid +248 common add_key sys_add_key +249 common request_key sys_request_key +250 common keyctl sys_keyctl +251 common ioprio_set sys_ioprio_set +252 common ioprio_get sys_ioprio_get +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare +273 64 set_robust_list sys_set_robust_list +274 64 get_robust_list sys_get_robust_list +275 common splice sys_splice +276 common tee sys_tee +277 common sync_file_range sys_sync_file_range +278 64 vmsplice sys_vmsplice +279 64 move_pages sys_move_pages +280 common utimensat sys_utimensat +281 common epoll_pwait sys_epoll_pwait +282 common signalfd sys_signalfd +283 common timerfd_create sys_timerfd_create +284 common eventfd sys_eventfd +285 common fallocate sys_fallocate +286 common timerfd_settime sys_timerfd_settime +287 common timerfd_gettime sys_timerfd_gettime +288 common accept4 sys_accept4 +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 +295 64 preadv sys_preadv +296 64 pwritev sys_pwritev +297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +298 common perf_event_open sys_perf_event_open +299 64 recvmmsg sys_recvmmsg +300 common fanotify_init sys_fanotify_init +301 common fanotify_mark sys_fanotify_mark +302 common prlimit64 sys_prlimit64 +303 common name_to_handle_at sys_name_to_handle_at +304 common open_by_handle_at sys_open_by_handle_at +305 common clock_adjtime sys_clock_adjtime +306 common syncfs sys_syncfs +307 64 sendmmsg sys_sendmmsg +308 common setns sys_setns +309 common getcpu sys_getcpu +310 64 process_vm_readv sys_process_vm_readv +311 64 process_vm_writev sys_process_vm_writev +312 common kcmp sys_kcmp +313 common finit_module sys_finit_module +314 common sched_setattr sys_sched_setattr +315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp +318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load +321 common bpf sys_bpf +322 64 execveat sys_execveat/ptregs +323 common userfaultfd sys_userfaultfd +324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 +326 common copy_file_range sys_copy_file_range +327 64 preadv2 sys_preadv2 +328 64 pwritev2 sys_pwritev2 + +# +# x32-specific system call numbers start at 512 to avoid cache impact +# for native 64-bit operation. +# +512 x32 rt_sigaction compat_sys_rt_sigaction +513 x32 rt_sigreturn sys32_x32_rt_sigreturn +514 x32 ioctl compat_sys_ioctl +515 x32 readv compat_sys_readv +516 x32 writev compat_sys_writev +517 x32 recvfrom compat_sys_recvfrom +518 x32 sendmsg compat_sys_sendmsg +519 x32 recvmsg compat_sys_recvmsg +520 x32 execve compat_sys_execve/ptregs +521 x32 ptrace compat_sys_ptrace +522 x32 rt_sigpending compat_sys_rt_sigpending +523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo +525 x32 sigaltstack compat_sys_sigaltstack +526 x32 timer_create compat_sys_timer_create +527 x32 mq_notify compat_sys_mq_notify +528 x32 kexec_load compat_sys_kexec_load +529 x32 waitid compat_sys_waitid +530 x32 set_robust_list compat_sys_set_robust_list +531 x32 get_robust_list compat_sys_get_robust_list +532 x32 vmsplice compat_sys_vmsplice +533 x32 move_pages compat_sys_move_pages +534 x32 preadv compat_sys_preadv64 +535 x32 pwritev compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg compat_sys_recvmmsg +538 x32 sendmmsg compat_sys_sendmmsg +539 x32 process_vm_readv compat_sys_process_vm_readv +540 x32 process_vm_writev compat_sys_process_vm_writev +541 x32 setsockopt compat_sys_setsockopt +542 x32 getsockopt compat_sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit +545 x32 execveat compat_sys_execveat/ptregs diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh new file mode 100755 index 000000000000..49a18b9ad9cf --- /dev/null +++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +in="$1" +arch="$2" + +syscall_macro() { + nr="$1" + name="$2" + + echo " [$nr] = \"$name\"," +} + +emit() { + nr="$1" + entry="$2" + + syscall_macro "$nr" "$entry" +} + +echo "static const char *syscalltbl_${arch}[] = {" + +sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX) +grep '^[0-9]' "$in" | sort -n > $sorted_table + +max_nr=0 +while read nr abi name entry compat; do + if [ $nr -ge 512 ] ; then # discard compat sycalls + break + fi + + emit "$nr" "$name" + max_nr=$nr +done < $sorted_table + +rm -f $sorted_table + +echo "};" + +echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}" diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 9d29ee283ac5..d4aa567a29c4 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused) CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 9223c164e545..1f86ee8fb831 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -63,6 +63,8 @@ struct pt_regs_offset { # define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} #endif +/* TODO: switching by dwarf address size */ +#ifndef __x86_64__ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_NAME_32("%ax", eax), REG_OFFSET_NAME_32("%cx", ecx), @@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_END, }; +#define regoffset_table x86_32_regoffset_table +#else static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_NAME_64("%ax", rax), REG_OFFSET_NAME_64("%dx", rdx), @@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_END, }; -/* TODO: switching by dwarf address size */ -#ifdef __x86_64__ #define regoffset_table x86_64_regoffset_table -#else -#define regoffset_table x86_32_regoffset_table #endif /* Minus 1 for the ending REG_OFFSET_END */ diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d66f9ad4df2e..7dc30637cf66 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err) if (!intel_bts_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + btsr = zalloc(sizeof(struct intel_bts_recording)); if (!btsr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index a3395179c9ee..a07b9605e93b 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err) if (!intel_pt_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + ptr = zalloc(sizeof(struct intel_pt_recording)); if (!ptr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index fd2868490d00..357f1b13b5ae 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include "../../util/debug.h" #include "../../util/tsc.h" -#include "tsc.h" int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) @@ -46,3 +45,34 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct time_conv_event), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h deleted file mode 100644 index 2edc4d31065c..000000000000 --- a/tools/perf/arch/x86/util/tsc.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ -#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ - -#include <linux/types.h> - -struct perf_tsc_conversion { - u16 time_shift; - u32 time_mult; - u64 time_zero; -}; - -struct perf_event_mmap_page; - -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc); - -#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */ diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a50df86f2b9b..579a592990dd 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -25,19 +25,17 @@ # endif #endif -extern int bench_numa(int argc, const char **argv, const char *prefix); -extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); -extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -extern int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused); -extern int bench_mem_memset(int argc, const char **argv, const char *prefix); -extern int bench_futex_hash(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake_parallel(int argc, const char **argv, - const char *prefix); -extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +int bench_numa(int argc, const char **argv, const char *prefix); +int bench_sched_messaging(int argc, const char **argv, const char *prefix); +int bench_sched_pipe(int argc, const char **argv, const char *prefix); +int bench_mem_memcpy(int argc, const char **argv, const char *prefix); +int bench_mem_memset(int argc, const char **argv, const char *prefix); +int bench_futex_hash(int argc, const char **argv, const char *prefix); +int bench_futex_wake(int argc, const char **argv, const char *prefix); +int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); +int bench_futex_requeue(int argc, const char **argv, const char *prefix); /* pi futexes */ -extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6a18ce21f865..6952db65508a 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -83,7 +83,7 @@ static void *workerfn(void *arg) do { int ret; again: - ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); + ret = futex_lock_pi(w->futex, NULL, futex_flag); if (ret) { /* handle lock acquisition */ if (!silent) diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index d44de9f44281..b2e06d1190d0 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) /** * futex_lock_pi() - block on uaddr as a PI mutex - * @detect: whether (1) or not (0) to perform deadlock detection */ static inline int -futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, - int opflags) +futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); + return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); } /** diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index a91aa85d80ff..2b54d0f2672a 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -6,6 +6,7 @@ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ +#include "debug.h" #include "../perf.h" #include "../util/util.h" #include <subcmd/parse-options.h> @@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = { .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_cycles(void) +static int init_cycles(void) { cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); - if (cycles_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycles_fd < 0); + if (cycles_fd < 0 && errno == ENOSYS) { + pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + return -1; + } + + return cycles_fd; } static u64 get_cycles(void) @@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * argc = parse_options(argc, argv, options, info->usage, 0); - if (use_cycles) - init_cycles(); + if (use_cycles) { + i = init_cycles(); + if (i < 0) { + fprintf(stderr, "Failed to open cycles counter\n"); + return i; + } + } size = (size_t)perf_atoll((char *)size_str); size_total = (double)size * nr_loops; diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index 57b4ed871459..5aad2a9408b0 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) \ - extern void *fn(void *, const void *, size_t); + void *fn(void *, const void *, size_t); #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index 633800cb0dcb..0d15786d9ae3 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) \ - extern void *fn(void *, int, size_t); + void *fn(void *, int, size_t); #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5049d6357a46..7500d959d7eb 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -293,7 +293,7 @@ static void bind_to_memnode(int node) if (node == -1) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); + BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); nodemask = 1L << node; ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cfe366375c4b..814158393656 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -94,7 +94,7 @@ static int process_sample_event(struct perf_tool *tool, struct addr_location al; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index c42448ed5dfe..fe1b77fa21f9 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -12,6 +12,7 @@ #include <subcmd/parse-options.h> #include "util/util.h" #include "util/debug.h" +#include "util/config.h" static bool use_system_config, use_user_config; @@ -32,13 +33,28 @@ static struct option config_options[] = { OPT_END() }; -static int show_config(const char *key, const char *value, - void *cb __maybe_unused) +static int show_config(struct perf_config_set *set) { - if (value) - printf("%s=%s\n", key, value); - else - printf("%s\n", key); + struct perf_config_section *section; + struct perf_config_item *item; + struct list_head *sections; + + if (set == NULL) + return -1; + + sections = &set->sections; + if (list_empty(sections)) + return -1; + + list_for_each_entry(section, sections, node) { + list_for_each_entry(item, §ion->items, node) { + char *value = item->value; + + if (value) + printf("%s.%s=%s\n", section->name, + item->name, value); + } + } return 0; } @@ -46,6 +62,7 @@ static int show_config(const char *key, const char *value, int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int ret = 0; + struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); argc = parse_options(argc, argv, config_options, config_usage, @@ -63,13 +80,19 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) else if (use_user_config) config_exclusive_filename = user_config; + set = perf_config_set__new(); + if (!set) { + ret = -1; + goto out_err; + } + switch (actions) { case ACTION_LIST: if (argc) { pr_err("Error: takes no arguments\n"); parse_options_usage(config_usage, config_options, "l", 1); } else { - ret = perf_config(show_config, NULL); + ret = show_config(set); if (ret < 0) { const char * config_filename = config_exclusive_filename; if (!config_exclusive_filename) @@ -83,5 +106,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(config_usage, config_options); } + perf_config_set__delete(set); +out_err: return ret; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 4d72359fd15a..9ce354f469dc 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -330,7 +330,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, struct hists *hists = evsel__hists(evsel); int ret = -1; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 49d55e21b1b0..f9830c902b78 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -61,6 +61,7 @@ static int check_emacsclient_version(void) struct child_process ec_process; const char *argv_ec[] = { "emacsclient", "--version", NULL }; int version; + int ret = -1; /* emacsclient prints its version number on stderr */ memset(&ec_process, 0, sizeof(ec_process)); @@ -71,7 +72,10 @@ static int check_emacsclient_version(void) fprintf(stderr, "Failed to start emacsclient.\n"); return -1; } - strbuf_read(&buffer, ec_process.err, 20); + if (strbuf_read(&buffer, ec_process.err, 20) < 0) { + fprintf(stderr, "Failed to read emacsclient version\n"); + goto out; + } close(ec_process.err); /* @@ -82,8 +86,7 @@ static int check_emacsclient_version(void) if (prefixcmp(buffer.buf, "emacsclient")) { fprintf(stderr, "Failed to parse emacsclient version.\n"); - strbuf_release(&buffer); - return -1; + goto out; } version = atoi(buffer.buf + strlen("emacsclient")); @@ -92,12 +95,11 @@ static int check_emacsclient_version(void) fprintf(stderr, "emacsclient version '%d' too old (< 22).\n", version); - strbuf_release(&buffer); - return -1; - } - + } else + ret = 0; +out: strbuf_release(&buffer); - return 0; + return ret; } static void exec_woman_emacs(const char *path, const char *page) @@ -106,12 +108,14 @@ static void exec_woman_emacs(const char *path, const char *page) if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ - struct strbuf man_page = STRBUF_INIT; + char *man_page; if (!path) path = "emacsclient"; - strbuf_addf(&man_page, "(woman \"%s\")", page); - execlp(path, "emacsclient", "-e", man_page.buf, NULL); + if (asprintf(&man_page, "(woman \"%s\")", page) > 0) { + execlp(path, "emacsclient", "-e", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -122,7 +126,7 @@ static void exec_man_konqueror(const char *path, const char *page) const char *display = getenv("DISPLAY"); if (display && *display) { - struct strbuf man_page = STRBUF_INIT; + char *man_page; const char *filename = "kfmclient"; char sbuf[STRERR_BUFSIZE]; @@ -141,8 +145,10 @@ static void exec_man_konqueror(const char *path, const char *page) filename = file; } else path = "kfmclient"; - strbuf_addf(&man_page, "man:%s(1)", page); - execlp(path, filename, "newTab", man_page.buf, NULL); + if (asprintf(&man_page, "man:%s(1)", page) > 0) { + execlp(path, filename, "newTab", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -161,11 +167,13 @@ static void exec_man_man(const char *path, const char *page) static void exec_man_cmd(const char *cmd, const char *page) { - struct strbuf shell_cmd = STRBUF_INIT; char sbuf[STRERR_BUFSIZE]; + char *shell_cmd; - strbuf_addf(&shell_cmd, "%s %s", cmd, page); - execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); + if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) { + execl("/bin/sh", "sh", "-c", shell_cmd, NULL); + free(shell_cmd); + } warning("failed to exec '%s': %s", cmd, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -299,43 +307,33 @@ static int is_perf_command(const char *s) is_in_cmdlist(&other_cmds, s); } -static const char *prepend(const char *prefix, const char *cmd) -{ - size_t pre_len = strlen(prefix); - size_t cmd_len = strlen(cmd); - char *p = malloc(pre_len + cmd_len + 1); - memcpy(p, prefix, pre_len); - strcpy(p + pre_len, cmd); - return p; -} - static const char *cmd_to_page(const char *perf_cmd) { + char *s; + if (!perf_cmd) return "perf"; else if (!prefixcmp(perf_cmd, "perf")) return perf_cmd; - else - return prepend("perf-", perf_cmd); + + return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; } static void setup_man_path(void) { - struct strbuf new_path = STRBUF_INIT; + char *new_path; const char *old_path = getenv("MANPATH"); /* We should always put ':' after our path. If there is no * old_path, the ':' at the end will let 'man' to try * system-wide paths after ours to find the manual page. If * there is old_path, we need ':' as delimiter. */ - strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); - strbuf_addch(&new_path, ':'); - if (old_path) - strbuf_addstr(&new_path, old_path); - - setenv("MANPATH", new_path.buf, 1); - - strbuf_release(&new_path); + if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) { + setenv("MANPATH", new_path, 1); + free(new_path); + } else { + error("Unable to setup man path"); + } } static void exec_viewer(const char *name, const char *page) @@ -380,7 +378,7 @@ static int show_info_page(const char *perf_cmd) return -1; } -static int get_html_page_path(struct strbuf *page_path, const char *page) +static int get_html_page_path(char **page_path, const char *page) { struct stat st; const char *html_path = system_path(PERF_HTML_PATH); @@ -392,10 +390,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page) return -1; } - strbuf_init(page_path, 0); - strbuf_addf(page_path, "%s/%s.html", html_path, page); - - return 0; + return asprintf(page_path, "%s/%s.html", html_path, page); } /* @@ -413,12 +408,12 @@ static void open_html(const char *path) static int show_html_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); - struct strbuf page_path; /* it leaks but we exec bellow */ + char *page_path; /* it leaks but we exec bellow */ - if (get_html_page_path(&page_path, page) != 0) + if (get_html_page_path(&page_path, page) < 0) return -1; - open_html(page_path.buf); + open_html(page_path); return 0; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 7fa68663ed72..e5afa8fe1bf1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -131,8 +131,7 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size) static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, union perf_event *event, - struct perf_session *session - __maybe_unused) + struct perf_session *session) { struct perf_inject *inject = container_of(tool, struct perf_inject, tool); @@ -417,9 +416,6 @@ static int perf_event__inject_buildid(struct perf_tool *tool, { struct addr_location al; struct thread *thread; - u8 cpumode; - - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -428,7 +424,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { @@ -752,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .auxtrace_info = perf_event__repipe_op2_synth, .auxtrace = perf_event__repipe_auxtrace, .auxtrace_error = perf_event__repipe_op2_synth, + .time_conv = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index c9cb3be47cff..58adfee230de 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample) } al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); - sample__resolve_callchain(sample, NULL, evsel, &al, 16); + sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16); callchain_cursor_commit(&callchain_cursor); while (true) { diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index bff666458b28..6487c06d2708 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_evlist *evlist = kvm->evlist; char sbuf[STRERR_BUFSIZE]; - perf_evlist__config(evlist, &kvm->opts); + perf_evlist__config(evlist, &kvm->opts, NULL); /* * Note: exclude_{guest,host} do not apply here. diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 88aeac9aa1da..1dc140c5481d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) int rec_argc, i = 0, j; const char **rec_argv; int ret; + bool all_user = false, all_kernel = false; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", parse_record_events), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - rec_argc = argc + 7; /* max number of arguments */ + rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; @@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = perf_mem_events__name(j); }; + if (all_user) + rec_argv[i++] = "--all-user"; + + if (all_kernel) + rec_argv[i++] = "--all-kernel"; + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -131,7 +140,7 @@ dump_raw_samples(struct perf_tool *tool, struct addr_location al; const char *fmt; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 515510ecc76a..f3679c44d3f3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -29,10 +29,12 @@ #include "util/data.h" #include "util/perf_regs.h" #include "util/auxtrace.h" +#include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" +#include "util/trigger.h" #include "asm/bug.h" #include <unistd.h> @@ -55,6 +57,8 @@ struct record { bool no_buildid_cache; bool no_buildid_cache_set; bool buildid_all; + bool timestamp_filename; + bool switch_output; unsigned long long samples; }; @@ -124,9 +128,10 @@ out: static volatile int done; static volatile int signr = -1; static volatile int child_finished; -static volatile int auxtrace_snapshot_enabled; -static volatile int auxtrace_snapshot_err; + static volatile int auxtrace_record__snapshot_started; +static DEFINE_TRIGGER(auxtrace_snapshot_trigger); +static DEFINE_TRIGGER(switch_output_trigger); static void sig_handler(int sig) { @@ -244,11 +249,12 @@ static void record__read_auxtrace_snapshot(struct record *rec) { pr_debug("Recording AUX area tracing snapshot\n"); if (record__auxtrace_read_snapshot_all(rec) < 0) { - auxtrace_snapshot_err = -1; + trigger_error(&auxtrace_snapshot_trigger); } else { - auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr); - if (!auxtrace_snapshot_err) - auxtrace_snapshot_enabled = 1; + if (auxtrace_record__snapshot_finish(rec->itr)) + trigger_error(&auxtrace_snapshot_trigger); + else + trigger_ready(&auxtrace_snapshot_trigger); } } @@ -283,7 +289,7 @@ static int record__open(struct record *rec) struct record_opts *opts = &rec->opts; int rc = 0; - perf_evlist__config(evlist, opts); + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each(evlist, pos) { try_again: @@ -494,6 +500,73 @@ record__finish_output(struct record *rec) return; } +static int record__synthesize_workload(struct record *rec) +{ + struct { + struct thread_map map; + struct thread_map_data map_data; + } thread_map; + + thread_map.map.nr = 1; + thread_map.map.map[0].pid = rec->evlist->workload.pid; + thread_map.map.map[0].comm = NULL; + return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map, + process_synthesized_event, + &rec->session->machines.host, + rec->opts.sample_address, + rec->opts.proc_map_timeout); +} + +static int record__synthesize(struct record *rec); + +static int +record__switch_output(struct record *rec, bool at_exit) +{ + struct perf_data_file *file = &rec->file; + int fd, err; + + /* Same Size: "2015122520103046"*/ + char timestamp[] = "InvalidTimestamp"; + + rec->samples = 0; + record__finish_output(rec); + err = fetch_current_timestamp(timestamp, sizeof(timestamp)); + if (err) { + pr_err("Failed to get current timestamp\n"); + return -EINVAL; + } + + fd = perf_data_file__switch(file, timestamp, + rec->session->header.data_offset, + at_exit); + if (fd >= 0 && !at_exit) { + rec->bytes_written = 0; + rec->session->header.data_size = 0; + } + + if (!quiet) + fprintf(stderr, "[ perf record: Dump %s.%s ]\n", + file->path, timestamp); + + /* Output tracking events */ + if (!at_exit) { + record__synthesize(rec); + + /* + * In 'perf record --switch-output' without -a, + * record__synthesize() in record__switch_output() won't + * generate tracking events because there's no thread_map + * in evlist. Which causes newly created perf.data doesn't + * contain map and comm information. + * Create a fake thread_map and directly call + * perf_event__synthesize_thread_map() for those events. + */ + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec); + } + return fd; +} + static volatile int workload_exec_errno; /* @@ -512,6 +585,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +int __weak +perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, + struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + static int record__synthesize(struct record *rec) { struct perf_session *session = rec->session; @@ -549,6 +631,11 @@ static int record__synthesize(struct record *rec) } } + err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool, + process_synthesized_event, machine); + if (err) + goto out; + if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, session, process_synthesized_event); @@ -600,10 +687,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - if (rec->opts.auxtrace_snapshot_mode) + + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { signal(SIGUSR2, snapshot_sig_handler); - else + if (rec->opts.auxtrace_snapshot_mode) + trigger_on(&auxtrace_snapshot_trigger); + if (rec->switch_output) + trigger_on(&switch_output_trigger); + } else { signal(SIGUSR2, SIG_IGN); + } session = perf_session__new(file, false, tool); if (session == NULL) { @@ -729,27 +822,45 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_evlist__enable(rec->evlist); } - auxtrace_snapshot_enabled = 1; + trigger_ready(&auxtrace_snapshot_trigger); + trigger_ready(&switch_output_trigger); for (;;) { unsigned long long hits = rec->samples; if (record__mmap_read_all(rec) < 0) { - auxtrace_snapshot_enabled = 0; + trigger_error(&auxtrace_snapshot_trigger); + trigger_error(&switch_output_trigger); err = -1; goto out_child; } if (auxtrace_record__snapshot_started) { auxtrace_record__snapshot_started = 0; - if (!auxtrace_snapshot_err) + if (!trigger_is_error(&auxtrace_snapshot_trigger)) record__read_auxtrace_snapshot(rec); - if (auxtrace_snapshot_err) { + if (trigger_is_error(&auxtrace_snapshot_trigger)) { pr_err("AUX area tracing snapshot failed\n"); err = -1; goto out_child; } } + if (trigger_is_hit(&switch_output_trigger)) { + trigger_ready(&switch_output_trigger); + + if (!quiet) + fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", + waking); + waking = 0; + fd = record__switch_output(rec, false); + if (fd < 0) { + pr_err("Failed to switch to new file\n"); + trigger_error(&switch_output_trigger); + err = fd; + goto out_child; + } + } + if (hits == rec->samples) { if (done || draining) break; @@ -772,12 +883,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * disable events in this case. */ if (done && !disabled && !target__none(&opts->target)) { - auxtrace_snapshot_enabled = 0; + trigger_off(&auxtrace_snapshot_trigger); perf_evlist__disable(rec->evlist); disabled = true; } } - auxtrace_snapshot_enabled = 0; + trigger_off(&auxtrace_snapshot_trigger); + trigger_off(&switch_output_trigger); if (forks && workload_exec_errno) { char msg[STRERR_BUFSIZE]; @@ -811,11 +923,22 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err) - record__finish_output(rec); + if (!err) { + if (!rec->timestamp_filename) { + record__finish_output(rec); + } else { + fd = record__switch_output(rec, true); + if (fd < 0) { + status = fd; + goto out_delete_session; + } + } + } if (!err && !quiet) { char samples[128]; + const char *postfix = rec->timestamp_filename ? + ".<timestamp>" : ""; if (rec->samples && !rec->opts.full_auxtrace) scnprintf(samples, sizeof(samples), @@ -823,9 +946,9 @@ out_child: else samples[0] = '\0'; - fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n", + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", perf_data_file__size(file) / 1024.0 / 1024.0, - file->path, samples); + file->path, postfix, samples); } out_delete_session: @@ -833,58 +956,61 @@ out_delete_session: return status; } -static void callchain_debug(void) +static void callchain_debug(struct callchain_param *callchain) { static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; - pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); + pr_debug("callchain: type %s\n", str[callchain->record_mode]); - if (callchain_param.record_mode == CALLCHAIN_DWARF) + if (callchain->record_mode == CALLCHAIN_DWARF) pr_debug("callchain: stack dump size %d\n", - callchain_param.dump_size); + callchain->dump_size); } -int record_parse_callchain_opt(const struct option *opt, - const char *arg, - int unset) +int record_opts__parse_callchain(struct record_opts *record, + struct callchain_param *callchain, + const char *arg, bool unset) { int ret; - struct record_opts *record = (struct record_opts *)opt->value; - - record->callgraph_set = true; - callchain_param.enabled = !unset; + callchain->enabled = !unset; /* --no-call-graph */ if (unset) { - callchain_param.record_mode = CALLCHAIN_NONE; + callchain->record_mode = CALLCHAIN_NONE; pr_debug("callchain: disabled\n"); return 0; } - ret = parse_callchain_record_opt(arg, &callchain_param); + ret = parse_callchain_record_opt(arg, callchain); if (!ret) { /* Enable data address sampling for DWARF unwind. */ - if (callchain_param.record_mode == CALLCHAIN_DWARF) + if (callchain->record_mode == CALLCHAIN_DWARF) record->sample_address = true; - callchain_debug(); + callchain_debug(callchain); } return ret; } +int record_parse_callchain_opt(const struct option *opt, + const char *arg, + int unset) +{ + return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); +} + int record_callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset __maybe_unused) { - struct record_opts *record = (struct record_opts *)opt->value; + struct callchain_param *callchain = opt->value; - record->callgraph_set = true; - callchain_param.enabled = true; + callchain->enabled = true; - if (callchain_param.record_mode == CALLCHAIN_NONE) - callchain_param.record_mode = CALLCHAIN_FP; + if (callchain->record_mode == CALLCHAIN_NONE) + callchain->record_mode = CALLCHAIN_FP; - callchain_debug(); + callchain_debug(callchain); return 0; } @@ -1122,7 +1248,7 @@ struct option __record_options[] = { record__parse_mmap_pages), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_CALLBACK_NOOPT('g', NULL, &record.opts, + OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, NULL, "enables call-graph recording" , &record_callchain_opt), OPT_CALLBACK(0, "call-graph", &record.opts, @@ -1195,6 +1321,10 @@ struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), + OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, + "append timestamp to output filename"), + OPT_BOOLEAN(0, "switch-output", &record.switch_output, + "Switch output when receive SIGUSR2"), OPT_END() }; @@ -1250,6 +1380,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } + if (rec->switch_output) + rec->timestamp_filename = true; + if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); if (err) @@ -1261,6 +1394,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (err) return err; + err = bpf__setup_stdout(rec->evlist); + if (err) { + bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Setup BPF stdout failed: %s\n", + errbuf); + return err; + } + err = -ENOMEM; symbol__init(NULL); @@ -1275,8 +1416,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" "even with a suitable vmlinux or kallsyms file.\n\n"); - if (rec->no_buildid_cache || rec->no_buildid) + if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); + } else if (rec->switch_output) { + /* + * In 'perf record --switch-output', disable buildid + * generation by default to reduce data file switching + * overhead. Still generate buildid if they are required + * explicitly using + * + * perf record --signal-trigger --no-no-buildid \ + * --no-no-buildid-cache + * + * Following code equals to: + * + * if ((rec->no_buildid || !rec->no_buildid_set) && + * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) + * disable_buildid_cache(); + */ + bool disable = true; + + if (rec->no_buildid_set && !rec->no_buildid) + disable = false; + if (rec->no_buildid_cache_set && !rec->no_buildid_cache) + disable = false; + if (disable) { + rec->no_buildid = true; + rec->no_buildid_cache = true; + disable_buildid_cache(); + } + } if (rec->evlist->nr_entries == 0 && perf_evlist__add_default(rec->evlist) < 0) { @@ -1335,9 +1504,13 @@ out_symbol_exit: static void snapshot_sig_handler(int sig __maybe_unused) { - if (!auxtrace_snapshot_enabled) - return; - auxtrace_snapshot_enabled = 0; - auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr); - auxtrace_record__snapshot_started = 1; + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { + trigger_hit(&auxtrace_snapshot_trigger); + auxtrace_record__snapshot_started = 1; + if (auxtrace_record__snapshot_start(record.itr)) + trigger_error(&auxtrace_snapshot_trigger); + } + + if (trigger_is_ready(&switch_output_trigger)) + trigger_hit(&switch_output_trigger); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7eea49f9ed46..87d40e3c4078 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -41,12 +41,12 @@ #include <dlfcn.h> #include <linux/bitmap.h> +#include <linux/stringify.h> struct report { struct perf_tool tool; struct perf_session *session; bool use_tui, use_gtk, use_stdio; - bool dont_use_callchains; bool show_full_info; bool show_threads; bool inverted_callchain; @@ -154,7 +154,7 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -234,7 +234,7 @@ static int report__setup_sample_type(struct report *rep) sample_type |= PERF_SAMPLE_BRANCH_STACK; if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { - if (sort__has_parent) { + if (perf_hpp_list.parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " "'perf record' without -g?\n"); @@ -246,7 +246,7 @@ static int report__setup_sample_type(struct report *rep) "you call 'perf record' without -g?\n"); return -1; } - } else if (!rep->dont_use_callchains && + } else if (!callchain_param.enabled && callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; @@ -598,13 +598,15 @@ static int __cmd_report(struct report *rep) static int report_parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - struct report *rep = (struct report *)opt->value; + struct callchain_param *callchain = opt->value; + callchain->enabled = !unset; /* * --no-call-graph */ if (unset) { - rep->dont_use_callchains = true; + symbol_conf.use_callchain = false; + callchain->mode = CHAIN_NONE; return 0; } @@ -689,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, .ordering_requires_timestamps = true, }, - .max_stack = PERF_MAX_STACK_DEPTH, + .max_stack = sysctl_perf_event_max_stack, .pretty_printing_style = "normal", .socket_filter = -1, }; @@ -733,7 +735,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, + OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", report_callchain_help, &report_parse_callchain_opt, callchain_default_opt), @@ -742,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", @@ -934,7 +936,7 @@ repeat: goto error; } - sort__need_collapse = true; + perf_hpp_list.need_collapse = true; } /* Force tty output for header output and per-thread stat. */ diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 871b55ae22a4..afa057666c2a 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -11,6 +11,8 @@ #include "util/session.h" #include "util/tool.h" #include "util/cloexec.h" +#include "util/thread_map.h" +#include "util/color.h" #include <subcmd/parse-options.h> #include "util/trace-event.h" @@ -122,6 +124,21 @@ struct trace_sched_handler { struct machine *machine); }; +#define COLOR_PIDS PERF_COLOR_BLUE +#define COLOR_CPUS PERF_COLOR_BG_RED + +struct perf_sched_map { + DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); + int *comp_cpus; + bool comp; + struct thread_map *color_pids; + const char *color_pids_str; + struct cpu_map *color_cpus; + const char *color_cpus_str; + struct cpu_map *cpus; + const char *cpus_str; +}; + struct perf_sched { struct perf_tool tool; const char *sort_order; @@ -173,6 +190,7 @@ struct perf_sched { struct list_head sort_list, cmp_pid; bool force; bool skip_merge; + struct perf_sched_map map; }; static u64 get_nsecs(void) @@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool, return 0; } +union map_priv { + void *ptr; + bool color; +}; + +static bool thread__has_color(struct thread *thread) +{ + union map_priv priv = { + .ptr = thread__priv(thread), + }; + + return priv.color; +} + +static struct thread* +map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) +{ + struct thread *thread = machine__findnew_thread(machine, pid, tid); + union map_priv priv = { + .color = false, + }; + + if (!sched->map.color_pids || !thread || thread__priv(thread)) + return thread; + + if (thread_map__has(sched->map.color_pids, tid)) + priv.color = true; + + thread__set_priv(thread, priv.ptr); + return thread; +} + static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { @@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int cpu, this_cpu = sample->cpu; + int i, this_cpu = sample->cpu; + int cpus_nr; + bool new_cpu = false; + const char *color = PERF_COLOR_NORMAL; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); if (this_cpu > sched->max_cpu) sched->max_cpu = this_cpu; + if (sched->map.comp) { + cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); + if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { + sched->map.comp_cpus[cpus_nr++] = this_cpu; + new_cpu = true; + } + } else + cpus_nr = sched->max_cpu; + timestamp0 = sched->cpu_last_switched[this_cpu]; sched->cpu_last_switched[this_cpu] = timestamp; if (timestamp0) @@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, return -1; } - sched_in = machine__findnew_thread(machine, -1, next_pid); + sched_in = map__findnew_thread(sched, machine, -1, next_pid); if (sched_in == NULL) return -1; @@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, new_shortname = 1; } - for (cpu = 0; cpu <= sched->max_cpu; cpu++) { + for (i = 0; i < cpus_nr; i++) { + int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; + struct thread *curr_thread = sched->curr_thread[cpu]; + const char *pid_color = color; + const char *cpu_color = color; + + if (curr_thread && thread__has_color(curr_thread)) + pid_color = COLOR_PIDS; + + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) + continue; + + if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) + cpu_color = COLOR_CPUS; + if (cpu != this_cpu) - printf(" "); + color_fprintf(stdout, cpu_color, " "); else - printf("*"); + color_fprintf(stdout, cpu_color, "*"); if (sched->curr_thread[cpu]) - printf("%2s ", sched->curr_thread[cpu]->shortname); + color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); else - printf(" "); + color_fprintf(stdout, color, " "); } - printf(" %12.6f secs ", (double)timestamp/1e9); + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) + goto out; + + color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9); if (new_shortname) { - printf("%s => %s:%d\n", + const char *pid_color = color; + + if (thread__has_color(sched_in)) + pid_color = COLOR_PIDS; + + color_fprintf(stdout, pid_color, "%s => %s:%d", sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); - } else { - printf("\n"); } + if (sched->map.comp && new_cpu) + color_fprintf(stdout, color, " (CPU %d)", this_cpu); + +out: + color_fprintf(stdout, color, "\n"); + thread__put(sched_in); return 0; @@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched) return 0; } +static int setup_map_cpus(struct perf_sched *sched) +{ + struct cpu_map *map; + + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + + if (sched->map.comp) { + sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); + if (!sched->map.comp_cpus) + return -1; + } + + if (!sched->map.cpus_str) + return 0; + + map = cpu_map__new(sched->map.cpus_str); + if (!map) { + pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); + return -1; + } + + sched->map.cpus = map; + return 0; +} + +static int setup_color_pids(struct perf_sched *sched) +{ + struct thread_map *map; + + if (!sched->map.color_pids_str) + return 0; + + map = thread_map__new_by_tid_str(sched->map.color_pids_str); + if (!map) { + pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); + return -1; + } + + sched->map.color_pids = map; + return 0; +} + +static int setup_color_cpus(struct perf_sched *sched) +{ + struct cpu_map *map; + + if (!sched->map.color_cpus_str) + return 0; + + map = cpu_map__new(sched->map.color_cpus_str); + if (!map) { + pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); + return -1; + } + + sched->map.color_cpus = map; + return 0; +} + static int perf_sched__map(struct perf_sched *sched) { - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + if (setup_map_cpus(sched)) + return -1; + + if (setup_color_pids(sched)) + return -1; + + if (setup_color_cpus(sched)) + return -1; setup_pager(); if (perf_sched__read_events(sched)) @@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "dump raw trace in ASCII"), OPT_END() }; + const struct option map_options[] = { + OPT_BOOLEAN(0, "compact", &sched.map.comp, + "map output in compact mode"), + OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", + "highlight given pids in map"), + OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", + "highlight given CPUs in map"), + OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", + "display given CPUs in map"), + OPT_END() + }; const char * const latency_usage[] = { "perf sched latency [<options>]", NULL @@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "perf sched replay [<options>]", NULL }; + const char * const map_usage[] = { + "perf sched map [<options>]", + NULL + }; const char *const sched_subcommands[] = { "record", "latency", "map", "replay", "script", NULL }; const char *sched_usage[] = { @@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) setup_sorting(&sched, latency_options, latency_usage); return perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { + if (argc) { + argc = parse_options(argc, argv, map_options, map_usage, 0); + if (argc) + usage_with_options(map_usage, map_options); + } sched.tp_handler = &map_ops; setup_sorting(&sched, latency_options, latency_usage); return perf_sched__map(&sched); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 57f9a7e7f7d3..efca81679bb3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -22,6 +22,7 @@ #include "util/thread_map.h" #include "util/stat.h" #include <linux/bitmap.h> +#include <linux/stringify.h> #include "asm/bug.h" #include "util/mem-events.h" @@ -317,19 +318,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts = 0; if (PRINT_FIELD(IP)) - output[type].print_ip_opts |= PRINT_IP_OPT_IP; + output[type].print_ip_opts |= EVSEL__PRINT_IP; if (PRINT_FIELD(SYM)) - output[type].print_ip_opts |= PRINT_IP_OPT_SYM; + output[type].print_ip_opts |= EVSEL__PRINT_SYM; if (PRINT_FIELD(DSO)) - output[type].print_ip_opts |= PRINT_IP_OPT_DSO; + output[type].print_ip_opts |= EVSEL__PRINT_DSO; if (PRINT_FIELD(SYMOFFSET)) - output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET; + output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET; if (PRINT_FIELD(SRCLINE)) - output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE; + output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } /* @@ -405,9 +406,7 @@ out: return 0; } -static void print_sample_iregs(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, +static void print_sample_iregs(struct perf_sample *sample, struct perf_event_attr *attr) { struct regs_dump *regs = &sample->intr_regs; @@ -476,10 +475,7 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstack(struct perf_sample *sample) { struct branch_stack *br = sample->branch_stack; u64 i; @@ -498,14 +494,11 @@ static void print_sample_brstack(union perf_event *event __maybe_unused, } } -static void print_sample_brstacksym(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstacksym(struct perf_sample *sample, + struct thread *thread) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; u64 i, from, to; if (!(br && br->nr)) @@ -518,11 +511,11 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); if (alf.map) alf.sym = map__find_symbol(alf.map, alf.addr, NULL); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); if (alt.map) alt.sym = map__find_symbol(alt.map, alt.addr, NULL); @@ -538,8 +531,7 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, } -static void print_sample_addr(union perf_event *event, - struct perf_sample *sample, +static void print_sample_addr(struct perf_sample *sample, struct thread *thread, struct perf_event_attr *attr) { @@ -550,7 +542,7 @@ static void print_sample_addr(union perf_event *event, if (!sample_addr_correlates_sym(attr)) return; - perf_event__preprocess_sample_addr(event, sample, thread, &al); + thread__resolve(thread, &al, sample); if (PRINT_FIELD(SYM)) { printf(" "); @@ -567,8 +559,7 @@ static void print_sample_addr(union perf_event *event, } } -static void print_sample_bts(union perf_event *event, - struct perf_sample *sample, +static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al) @@ -579,18 +570,23 @@ static void print_sample_bts(union perf_event *event, /* print branch_from information */ if (PRINT_FIELD(IP)) { unsigned int print_opts = output[attr->type].print_ip_opts; + struct callchain_cursor *cursor = NULL; - if (symbol_conf.use_callchain && sample->callchain) { - printf("\n"); - } else { - printf(" "); - if (print_opts & PRINT_IP_OPT_SRCLINE) { + if (symbol_conf.use_callchain && sample->callchain && + thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, scripting_max_stack) == 0) + cursor = &callchain_cursor; + + if (cursor == NULL) { + putchar(' '); + if (print_opts & EVSEL__PRINT_SRCLINE) { print_srcline_last = true; - print_opts &= ~PRINT_IP_OPT_SRCLINE; + print_opts &= ~EVSEL__PRINT_SRCLINE; } - } - perf_evsel__print_ip(evsel, sample, al, print_opts, - scripting_max_stack); + } else + putchar('\n'); + + sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout); } /* print branch_to information */ @@ -598,7 +594,7 @@ static void print_sample_bts(union perf_event *event, ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[attr->type].user_set)) { printf(" => "); - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); } if (print_srcline_last) @@ -747,7 +743,7 @@ static size_t data_src__printf(u64 data_src) return printf("%-*s", maxlen, out); } -static void process_event(struct perf_script *script, union perf_event *event, +static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { @@ -776,7 +772,7 @@ static void process_event(struct perf_script *script, union perf_event *event, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(event, sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al); return; } @@ -784,7 +780,7 @@ static void process_event(struct perf_script *script, union perf_event *event, event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); if (PRINT_FIELD(DATA_SRC)) data_src__printf(sample->data_src); @@ -793,23 +789,24 @@ static void process_event(struct perf_script *script, union perf_event *event, printf("%16" PRIu64, sample->weight); if (PRINT_FIELD(IP)) { - if (!symbol_conf.use_callchain) - printf(" "); - else - printf("\n"); + struct callchain_cursor *cursor = NULL; + + if (symbol_conf.use_callchain && sample->callchain && + thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, scripting_max_stack) == 0) + cursor = &callchain_cursor; - perf_evsel__print_ip(evsel, sample, al, - output[attr->type].print_ip_opts, - scripting_max_stack); + putchar(cursor ? '\n' : ' '); + sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout); } if (PRINT_FIELD(IREGS)) - print_sample_iregs(event, sample, thread, attr); + print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(event, sample, thread, attr); + print_sample_brstack(sample); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(event, sample, thread, attr); + print_sample_brstacksym(sample, thread); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); @@ -905,7 +902,7 @@ static int process_sample_event(struct perf_tool *tool, return 0; } - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -920,7 +917,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, event, sample, evsel, &al); + process_event(scr, sample, evsel, &al); out_put: addr_location__put(&al); @@ -1425,21 +1422,19 @@ static int is_directory(const char *base_path, const struct dirent *dent) return S_ISDIR(st.st_mode); } -#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\ - while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ - lang_next) \ - if ((lang_dirent.d_type == DT_DIR || \ - (lang_dirent.d_type == DT_UNKNOWN && \ - is_directory(scripts_path, &lang_dirent))) && \ - (strcmp(lang_dirent.d_name, ".")) && \ - (strcmp(lang_dirent.d_name, ".."))) +#define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ + while ((lang_dirent = readdir(scripts_dir)) != NULL) \ + if ((lang_dirent->d_type == DT_DIR || \ + (lang_dirent->d_type == DT_UNKNOWN && \ + is_directory(scripts_path, lang_dirent))) && \ + (strcmp(lang_dirent->d_name, ".")) && \ + (strcmp(lang_dirent->d_name, ".."))) -#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\ - while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ - script_next) \ - if (script_dirent.d_type != DT_DIR && \ - (script_dirent.d_type != DT_UNKNOWN || \ - !is_directory(lang_path, &script_dirent))) +#define for_each_script(lang_path, lang_dir, script_dirent) \ + while ((script_dirent = readdir(lang_dir)) != NULL) \ + if (script_dirent->d_type != DT_DIR && \ + (script_dirent->d_type != DT_UNKNOWN || \ + !is_directory(lang_path, script_dirent))) #define RECORD_SUFFIX "-record" @@ -1585,7 +1580,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset __maybe_unused) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; char script_path[MAXPATHLEN]; @@ -1600,19 +1595,19 @@ static int list_available_scripts(const struct option *opt __maybe_unused, if (!scripts_dir) return -1; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - script_root = get_script_root(&script_dirent, REPORT_SUFFIX); + for_each_script(lang_path, lang_dir, script_dirent) { + script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -1700,7 +1695,7 @@ static int check_ev_match(char *dir_name, char *scriptname, */ int find_scripts(char **scripts_array, char **scripts_path_array) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; @@ -1723,9 +1718,9 @@ int find_scripts(char **scripts_array, char **scripts_path_array) return -1; } - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -1739,16 +1734,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array) if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { + for_each_script(lang_path, lang_dir, script_dirent) { /* Skip those real time scripts: xxxtop.p[yl] */ - if (strstr(script_dirent.d_name, "top.")) + if (strstr(script_dirent->d_name, "top.")) continue; sprintf(scripts_path_array[i], "%s/%s", lang_path, - script_dirent.d_name); - temp = strchr(script_dirent.d_name, '.'); + script_dirent->d_name); + temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], - (temp - script_dirent.d_name) + 1, - "%s", script_dirent.d_name); + (temp - script_dirent->d_name) + 1, + "%s", script_dirent->d_name); if (check_ev_match(lang_path, scripts_array[i], session)) @@ -1766,7 +1761,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array) static char *get_script_path(const char *script_root, const char *suffix) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; char script_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; @@ -1779,21 +1774,21 @@ static char *get_script_path(const char *script_root, const char *suffix) if (!scripts_dir) return NULL; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - __script_root = get_script_root(&script_dirent, suffix); + for_each_script(lang_path, lang_dir, script_dirent) { + __script_root = get_script_root(script_dirent, suffix); if (__script_root && !strcmp(script_root, __script_root)) { free(__script_root); closedir(lang_dir); closedir(scripts_dir); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); @@ -1971,6 +1966,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, + .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, @@ -2032,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "only consider symbols in these pids"), OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "only consider symbols in these tids"), + OPT_UINTEGER(0, "max-stack", &scripting_max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -2067,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + scripting_max_stack = sysctl_perf_event_max_stack; + setup_scripting(); argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1f19f2f999c8..e459b685a4e9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -298,6 +298,14 @@ static int read_counter(struct perf_evsel *counter) return -1; } } + + if (verbose > 1) { + fprintf(stat_config.output, + "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), + cpu, + count->val, count->ena, count->run); + } } } @@ -528,6 +536,7 @@ static int __run_perf_stat(int argc, const char **argv) perf_evlist__set_leader(evsel_list); evlist__for_each(evsel_list, counter) { +try_again: if (create_perf_stat_counter(counter) < 0) { /* * PPC returns ENXIO for HW counters until 2.6.37 @@ -544,7 +553,11 @@ static int __run_perf_stat(int argc, const char **argv) if ((counter->leader != counter) || !(counter->leader->nr_members > 1)) continue; - } + } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (verbose) + ui__warning("%s\n", msg); + goto try_again; + } perf_evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index bd7a7757176f..40cc9bb3506c 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -489,7 +489,7 @@ static const char *cat_backtrace(union perf_event *event, if (!chain) goto exit; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); goto exit; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 94af190f6843..1793da585676 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -67,6 +67,7 @@ #include <sys/utsname.h> #include <sys/mman.h> +#include <linux/stringify.h> #include <linux/types.h> static volatile int done; @@ -687,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; - if (sort__has_sym && single) + if (perf_hpp_list.sym && single) perf_top__record_precise_ip(top, he, evsel->idx, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, @@ -728,7 +729,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) top->exact_samples++; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) + if (machine__resolve(machine, &al, sample) < 0) return; if (!top->kptr_restrict_warned && @@ -809,7 +810,6 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) struct perf_session *session = top->session; union perf_event *event; struct machine *machine; - u8 origin; int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { @@ -822,12 +822,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) evsel = perf_evlist__id2evsel(session->evlist, sample.id); assert(evsel != NULL); - origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (event->header.type == PERF_RECORD_SAMPLE) ++top->samples; - switch (origin) { + switch (sample.cpumode) { case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) @@ -888,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top) struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; - perf_evlist__config(evlist, opts); + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each(evlist, counter) { try_again: @@ -919,15 +917,15 @@ out_err: return -1; } -static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused) +static int callchain_param__setup_sample_type(struct callchain_param *callchain) { - if (!sort__has_sym) { - if (symbol_conf.use_callchain) { + if (!perf_hpp_list.sym) { + if (callchain->enabled) { ui__error("Selected -g but \"sym\" not present in --sort/-s."); return -EINVAL; } - } else if (callchain_param.mode != CHAIN_NONE) { - if (callchain_register_param(&callchain_param) < 0) { + } else if (callchain->mode != CHAIN_NONE) { + if (callchain_register_param(callchain) < 0) { ui__error("Can't register callchain params.\n"); return -EINVAL; } @@ -954,7 +952,7 @@ static int __cmd_top(struct perf_top *top) goto out_delete; } - ret = perf_top__setup_sample_type(top); + ret = callchain_param__setup_sample_type(&callchain_param); if (ret) goto out_delete; @@ -964,7 +962,7 @@ static int __cmd_top(struct perf_top *top) machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout); - if (sort__has_socket) { + if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); if (ret < 0) goto out_err_cpu_topo; @@ -1047,18 +1045,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset) static int parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - struct record_opts *record = (struct record_opts *)opt->value; + struct callchain_param *callchain = opt->value; - record->callgraph_set = true; - callchain_param.enabled = !unset; - callchain_param.record_mode = CALLCHAIN_FP; + callchain->enabled = !unset; + callchain->record_mode = CALLCHAIN_FP; /* * --no-call-graph */ if (unset) { symbol_conf.use_callchain = false; - callchain_param.record_mode = CALLCHAIN_NONE; + callchain->record_mode = CALLCHAIN_NONE; return 0; } @@ -1106,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) }, .proc_map_timeout = 500, }, - .max_stack = PERF_MAX_STACK_DEPTH, + .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, }; struct record_opts *opts = &top.record_opts; @@ -1164,17 +1161,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "output field(s): overhead, period, sample plus all of sort keys"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, + OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, NULL, "enables call-graph recording and display", &callchain_opt), - OPT_CALLBACK(0, "call-graph", &top.record_opts, + OPT_CALLBACK(0, "call-graph", &callchain_param, "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]", top_callchain_help, &parse_callchain_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), @@ -1258,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ - sort__need_collapse = 1; + perf_hpp_list.need_collapse = 1; if (top.use_stdio) use_browser = 0; @@ -1314,7 +1311,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) top.sym_evsel = perf_evlist__first(top.evlist); - if (!symbol_conf.use_callchain) { + if (!callchain_param.enabled) { symbol_conf.cumulate_callchain = false; perf_hpp__cancel_cumulate(); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8dc98c598b1a..6e5c325148e4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -34,79 +34,76 @@ #include "trace-event.h" #include "util/parse-events.h" #include "util/bpf-loader.h" +#include "callchain.h" +#include "syscalltbl.h" +#include "rb_resort.h" -#include <libaudit.h> +#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */ #include <stdlib.h> -#include <sys/mman.h> -#include <linux/futex.h> #include <linux/err.h> - -/* For older distros: */ -#ifndef MAP_STACK -# define MAP_STACK 0x20000 -#endif - -#ifndef MADV_HWPOISON -# define MADV_HWPOISON 100 - -#endif - -#ifndef MADV_MERGEABLE -# define MADV_MERGEABLE 12 -#endif - -#ifndef MADV_UNMERGEABLE -# define MADV_UNMERGEABLE 13 -#endif - -#ifndef EFD_SEMAPHORE -# define EFD_SEMAPHORE 1 -#endif - -#ifndef EFD_NONBLOCK -# define EFD_NONBLOCK 00004000 -#endif - -#ifndef EFD_CLOEXEC -# define EFD_CLOEXEC 02000000 -#endif +#include <linux/filter.h> +#include <linux/audit.h> +#include <sys/ptrace.h> +#include <linux/random.h> +#include <linux/stringify.h> #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif -#ifndef SOCK_DCCP -# define SOCK_DCCP 6 -#endif - -#ifndef SOCK_CLOEXEC -# define SOCK_CLOEXEC 02000000 -#endif - -#ifndef SOCK_NONBLOCK -# define SOCK_NONBLOCK 00004000 -#endif - -#ifndef MSG_CMSG_CLOEXEC -# define MSG_CMSG_CLOEXEC 0x40000000 -#endif - -#ifndef PERF_FLAG_FD_NO_GROUP -# define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#endif - -#ifndef PERF_FLAG_FD_OUTPUT -# define PERF_FLAG_FD_OUTPUT (1UL << 1) -#endif - -#ifndef PERF_FLAG_PID_CGROUP -# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#endif - -#ifndef PERF_FLAG_FD_CLOEXEC -# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ -#endif - +struct trace { + struct perf_tool tool; + struct syscalltbl *sctbl; + struct { + int max; + struct syscall *table; + struct { + struct perf_evsel *sys_enter, + *sys_exit; + } events; + } syscalls; + struct record_opts opts; + struct perf_evlist *evlist; + struct machine *host; + struct thread *current; + u64 base_time; + FILE *output; + unsigned long nr_events; + struct strlist *ev_qualifier; + struct { + size_t nr; + int *entries; + } ev_qualifier_ids; + struct intlist *tid_list; + struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; + double duration_filter; + double runtime_ms; + struct { + u64 vfs_getname, + proc_getname; + } stats; + unsigned int max_stack; + unsigned int min_stack; + bool not_ev_qualifier; + bool live; + bool full_time; + bool sched; + bool multiple_threads; + bool summary; + bool summary_only; + bool show_comm; + bool show_tool_stats; + bool trace_syscalls; + bool kernel_syscallchains; + bool force; + bool vfs_getname; + int trace_pgfaults; + int open_id; +}; struct tp_field { int offset; @@ -371,221 +368,6 @@ static size_t syscall_arg__scnprintf_int(char *bf, size_t size, #define SCA_INT syscall_arg__scnprintf_int -static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, prot = arg->val; - - if (prot == PROT_NONE) - return scnprintf(bf, size, "NONE"); -#define P_MMAP_PROT(n) \ - if (prot & PROT_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - prot &= ~PROT_##n; \ - } - - P_MMAP_PROT(EXEC); - P_MMAP_PROT(READ); - P_MMAP_PROT(WRITE); -#ifdef PROT_SEM - P_MMAP_PROT(SEM); -#endif - P_MMAP_PROT(GROWSDOWN); - P_MMAP_PROT(GROWSUP); -#undef P_MMAP_PROT - - if (prot) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); - - return printed; -} - -#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot - -static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - -#define P_MMAP_FLAG(n) \ - if (flags & MAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MAP_##n; \ - } - - P_MMAP_FLAG(SHARED); - P_MMAP_FLAG(PRIVATE); -#ifdef MAP_32BIT - P_MMAP_FLAG(32BIT); -#endif - P_MMAP_FLAG(ANONYMOUS); - P_MMAP_FLAG(DENYWRITE); - P_MMAP_FLAG(EXECUTABLE); - P_MMAP_FLAG(FILE); - P_MMAP_FLAG(FIXED); - P_MMAP_FLAG(GROWSDOWN); -#ifdef MAP_HUGETLB - P_MMAP_FLAG(HUGETLB); -#endif - P_MMAP_FLAG(LOCKED); - P_MMAP_FLAG(NONBLOCK); - P_MMAP_FLAG(NORESERVE); - P_MMAP_FLAG(POPULATE); - P_MMAP_FLAG(STACK); -#ifdef MAP_UNINITIALIZED - P_MMAP_FLAG(UNINITIALIZED); -#endif -#undef P_MMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags - -static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - -#define P_MREMAP_FLAG(n) \ - if (flags & MREMAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MREMAP_##n; \ - } - - P_MREMAP_FLAG(MAYMOVE); -#ifdef MREMAP_FIXED - P_MREMAP_FLAG(FIXED); -#endif -#undef P_MREMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags - -static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, - struct syscall_arg *arg) -{ - int behavior = arg->val; - - switch (behavior) { -#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) - P_MADV_BHV(NORMAL); - P_MADV_BHV(RANDOM); - P_MADV_BHV(SEQUENTIAL); - P_MADV_BHV(WILLNEED); - P_MADV_BHV(DONTNEED); - P_MADV_BHV(REMOVE); - P_MADV_BHV(DONTFORK); - P_MADV_BHV(DOFORK); - P_MADV_BHV(HWPOISON); -#ifdef MADV_SOFT_OFFLINE - P_MADV_BHV(SOFT_OFFLINE); -#endif - P_MADV_BHV(MERGEABLE); - P_MADV_BHV(UNMERGEABLE); -#ifdef MADV_HUGEPAGE - P_MADV_BHV(HUGEPAGE); -#endif -#ifdef MADV_NOHUGEPAGE - P_MADV_BHV(NOHUGEPAGE); -#endif -#ifdef MADV_DONTDUMP - P_MADV_BHV(DONTDUMP); -#endif -#ifdef MADV_DODUMP - P_MADV_BHV(DODUMP); -#endif -#undef P_MADV_PHV - default: break; - } - - return scnprintf(bf, size, "%#x", behavior); -} - -#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior - -static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, op = arg->val; - - if (op == 0) - return scnprintf(bf, size, "NONE"); -#define P_CMD(cmd) \ - if ((op & LOCK_##cmd) == LOCK_##cmd) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ - op &= ~LOCK_##cmd; \ - } - - P_CMD(SH); - P_CMD(EX); - P_CMD(NB); - P_CMD(UN); - P_CMD(MAND); - P_CMD(RW); - P_CMD(READ); - P_CMD(WRITE); -#undef P_OP - - if (op) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); - - return printed; -} - -#define SCA_FLOCK syscall_arg__scnprintf_flock - -static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) -{ - enum syscall_futex_args { - SCF_UADDR = (1 << 0), - SCF_OP = (1 << 1), - SCF_VAL = (1 << 2), - SCF_TIMEOUT = (1 << 3), - SCF_UADDR2 = (1 << 4), - SCF_VAL3 = (1 << 5), - }; - int op = arg->val; - int cmd = op & FUTEX_CMD_MASK; - size_t printed = 0; - - switch (cmd) { -#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); - P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; - P_FUTEX_OP(WAKE_OP); break; - P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; - P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; - P_FUTEX_OP(WAIT_REQUEUE_PI); break; - default: printed = scnprintf(bf, size, "%#x", cmd); break; - } - - if (op & FUTEX_PRIVATE_FLAG) - printed += scnprintf(bf + printed, size - printed, "|PRIV"); - - if (op & FUTEX_CLOCK_REALTIME) - printed += scnprintf(bf + printed, size - printed, "|CLKRT"); - - return printed; -} - -#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op - static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", @@ -652,110 +434,6 @@ static const char *socket_families[] = { }; static DEFINE_STRARRAY(socket_families); -#ifndef SOCK_TYPE_MASK -#define SOCK_TYPE_MASK 0xf -#endif - -static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, - struct syscall_arg *arg) -{ - size_t printed; - int type = arg->val, - flags = type & ~SOCK_TYPE_MASK; - - type &= SOCK_TYPE_MASK; - /* - * Can't use a strarray, MIPS may override for ABI reasons. - */ - switch (type) { -#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; - P_SK_TYPE(STREAM); - P_SK_TYPE(DGRAM); - P_SK_TYPE(RAW); - P_SK_TYPE(RDM); - P_SK_TYPE(SEQPACKET); - P_SK_TYPE(DCCP); - P_SK_TYPE(PACKET); -#undef P_SK_TYPE - default: - printed = scnprintf(bf, size, "%#x", type); - } - -#define P_SK_FLAG(n) \ - if (flags & SOCK_##n) { \ - printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ - flags &= ~SOCK_##n; \ - } - - P_SK_FLAG(CLOEXEC); - P_SK_FLAG(NONBLOCK); -#undef P_SK_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "|%#x", flags); - - return printed; -} - -#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type - -#ifndef MSG_PROBE -#define MSG_PROBE 0x10 -#endif -#ifndef MSG_WAITFORONE -#define MSG_WAITFORONE 0x10000 -#endif -#ifndef MSG_SENDPAGE_NOTLAST -#define MSG_SENDPAGE_NOTLAST 0x20000 -#endif -#ifndef MSG_FASTOPEN -#define MSG_FASTOPEN 0x20000000 -#endif - -static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return scnprintf(bf, size, "NONE"); -#define P_MSG_FLAG(n) \ - if (flags & MSG_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MSG_##n; \ - } - - P_MSG_FLAG(OOB); - P_MSG_FLAG(PEEK); - P_MSG_FLAG(DONTROUTE); - P_MSG_FLAG(TRYHARD); - P_MSG_FLAG(CTRUNC); - P_MSG_FLAG(PROBE); - P_MSG_FLAG(TRUNC); - P_MSG_FLAG(DONTWAIT); - P_MSG_FLAG(EOR); - P_MSG_FLAG(WAITALL); - P_MSG_FLAG(FIN); - P_MSG_FLAG(SYN); - P_MSG_FLAG(CONFIRM); - P_MSG_FLAG(RST); - P_MSG_FLAG(ERRQUEUE); - P_MSG_FLAG(NOSIGNAL); - P_MSG_FLAG(MORE); - P_MSG_FLAG(WAITFORONE); - P_MSG_FLAG(SENDPAGE_NOTLAST); - P_MSG_FLAG(FASTOPEN); - P_MSG_FLAG(CMSG_CLOEXEC); -#undef P_MSG_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags - static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, struct syscall_arg *arg) { @@ -788,116 +466,6 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, #define SCA_FILENAME syscall_arg__scnprintf_filename -static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (!(flags & O_CREAT)) - arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ - - if (flags == 0) - return scnprintf(bf, size, "RDONLY"); -#define P_FLAG(n) \ - if (flags & O_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~O_##n; \ - } - - P_FLAG(APPEND); - P_FLAG(ASYNC); - P_FLAG(CLOEXEC); - P_FLAG(CREAT); - P_FLAG(DIRECT); - P_FLAG(DIRECTORY); - P_FLAG(EXCL); - P_FLAG(LARGEFILE); - P_FLAG(NOATIME); - P_FLAG(NOCTTY); -#ifdef O_NONBLOCK - P_FLAG(NONBLOCK); -#elif O_NDELAY - P_FLAG(NDELAY); -#endif -#ifdef O_PATH - P_FLAG(PATH); -#endif - P_FLAG(RDWR); -#ifdef O_DSYNC - if ((flags & O_SYNC) == O_SYNC) - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); - else { - P_FLAG(DSYNC); - } -#else - P_FLAG(SYNC); -#endif - P_FLAG(TRUNC); - P_FLAG(WRONLY); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags - -static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return 0; - -#define P_FLAG(n) \ - if (flags & PERF_FLAG_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~PERF_FLAG_##n; \ - } - - P_FLAG(FD_NO_GROUP); - P_FLAG(FD_OUTPUT); - P_FLAG(PID_CGROUP); - P_FLAG(FD_CLOEXEC); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags - -static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return scnprintf(bf, size, "NONE"); -#define P_FLAG(n) \ - if (flags & EFD_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~EFD_##n; \ - } - - P_FLAG(SEMAPHORE); - P_FLAG(CLOEXEC); - P_FLAG(NONBLOCK); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags - static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -921,59 +489,6 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags -static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) -{ - int sig = arg->val; - - switch (sig) { -#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) - P_SIGNUM(HUP); - P_SIGNUM(INT); - P_SIGNUM(QUIT); - P_SIGNUM(ILL); - P_SIGNUM(TRAP); - P_SIGNUM(ABRT); - P_SIGNUM(BUS); - P_SIGNUM(FPE); - P_SIGNUM(KILL); - P_SIGNUM(USR1); - P_SIGNUM(SEGV); - P_SIGNUM(USR2); - P_SIGNUM(PIPE); - P_SIGNUM(ALRM); - P_SIGNUM(TERM); - P_SIGNUM(CHLD); - P_SIGNUM(CONT); - P_SIGNUM(STOP); - P_SIGNUM(TSTP); - P_SIGNUM(TTIN); - P_SIGNUM(TTOU); - P_SIGNUM(URG); - P_SIGNUM(XCPU); - P_SIGNUM(XFSZ); - P_SIGNUM(VTALRM); - P_SIGNUM(PROF); - P_SIGNUM(WINCH); - P_SIGNUM(IO); - P_SIGNUM(PWR); - P_SIGNUM(SYS); -#ifdef SIGEMT - P_SIGNUM(EMT); -#endif -#ifdef SIGSTKFLT - P_SIGNUM(STKFLT); -#endif -#ifdef SIGSWI - P_SIGNUM(SWI); -#endif - default: break; - } - - return scnprintf(bf, size, "%#x", sig); -} - -#define SCA_SIGNUM syscall_arg__scnprintf_signum - #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1001,16 +516,62 @@ static const char *tioctls[] = { static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); #endif /* defined(__i386__) || defined(__x86_64__) */ +#ifndef GRND_NONBLOCK +#define GRND_NONBLOCK 0x0001 +#endif +#ifndef GRND_RANDOM +#define GRND_RANDOM 0x0002 +#endif + +static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & GRND_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~GRND_##n; \ + } + + P_FLAG(RANDOM); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags + #define STRARRAY(arg, name, array) \ .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ .arg_parm = { [arg] = &strarray__##array, } +#include "trace/beauty/eventfd.c" +#include "trace/beauty/flock.c" +#include "trace/beauty/futex_op.c" +#include "trace/beauty/mmap.c" +#include "trace/beauty/mode_t.c" +#include "trace/beauty/msg_flags.c" +#include "trace/beauty/open_flags.c" +#include "trace/beauty/perf_event_open.c" +#include "trace/beauty/pid.c" +#include "trace/beauty/sched_policy.c" +#include "trace/beauty/seccomp.c" +#include "trace/beauty/signum.c" +#include "trace/beauty/socket_type.c" +#include "trace/beauty/waitid_options.c" + static struct syscall_fmt { const char *name; const char *alias; size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); void *arg_parm[6]; bool errmsg; + bool errpid; bool timeout; bool hexret; } syscall_fmts[] = { @@ -1028,6 +589,7 @@ static struct syscall_fmt { { .name = "chroot", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, + { .name = "clone", .errpid = true, }, { .name = "close", .errmsg = true, .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, @@ -1093,6 +655,11 @@ static struct syscall_fmt { { .name = "getdents64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "getpid", .errpid = true, }, + { .name = "getpgid", .errpid = true, }, + { .name = "getppid", .errpid = true, }, + { .name = "getrandom", .errmsg = true, + .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "getxattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, @@ -1186,8 +753,7 @@ static struct syscall_fmt { [1] = SCA_FILENAME, /* filename */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "perf_event_open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_INT, /* pid */ - [2] = SCA_INT, /* cpu */ + .arg_scnprintf = { [2] = SCA_INT, /* cpu */ [3] = SCA_FD, /* group_fd */ [4] = SCA_PERF_FLAGS, /* flags */ }, }, { .name = "pipe2", .errmsg = true, @@ -1234,6 +800,11 @@ static struct syscall_fmt { .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_tgsigqueueinfo", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + { .name = "sched_setscheduler", .errmsg = true, + .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, }, + { .name = "seccomp", .errmsg = true, + .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */ + [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ @@ -1244,7 +815,9 @@ static struct syscall_fmt { { .name = "sendto", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [3] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "setpgid", .errmsg = true, }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "setxattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, @@ -1287,6 +860,10 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "vmsplice", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "wait4", .errpid = true, + .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, + { .name = "waitid", .errpid = true, + .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, }, { .name = "write", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, @@ -1398,59 +975,6 @@ fail: static const size_t trace__entry_str_size = 2048; -struct trace { - struct perf_tool tool; - struct { - int machine; - int open_id; - } audit; - struct { - int max; - struct syscall *table; - struct { - struct perf_evsel *sys_enter, - *sys_exit; - } events; - } syscalls; - struct record_opts opts; - struct perf_evlist *evlist; - struct machine *host; - struct thread *current; - u64 base_time; - FILE *output; - unsigned long nr_events; - struct strlist *ev_qualifier; - struct { - size_t nr; - int *entries; - } ev_qualifier_ids; - struct intlist *tid_list; - struct intlist *pid_list; - struct { - size_t nr; - pid_t *entries; - } filter_pids; - double duration_filter; - double runtime_ms; - struct { - u64 vfs_getname, - proc_getname; - } stats; - bool not_ev_qualifier; - bool live; - bool full_time; - bool sched; - bool multiple_threads; - bool summary; - bool summary_only; - bool show_comm; - bool show_tool_stats; - bool trace_syscalls; - bool force; - bool vfs_getname; - int trace_pgfaults; -}; - static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) { struct thread_trace *ttrace = thread__priv(thread); @@ -1618,6 +1142,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); ret = machine__process_lost_event(machine, event, sample); + break; default: ret = machine__process_event(machine, event, sample); break; @@ -1675,6 +1200,10 @@ static int syscall__set_arg_fmts(struct syscall *sc) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; else if (field->flags & FIELD_IS_POINTER) sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; + else if (strcmp(field->type, "pid_t") == 0) + sc->arg_scnprintf[idx] = SCA_PID; + else if (strcmp(field->type, "umode_t") == 0) + sc->arg_scnprintf[idx] = SCA_MODE_T; ++idx; } @@ -1685,7 +1214,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; struct syscall *sc; - const char *name = audit_syscall_to_name(id, trace->audit.machine); + const char *name = syscalltbl__name(trace->sctbl, id); if (name == NULL) return -1; @@ -1760,7 +1289,7 @@ static int trace__validate_ev_qualifier(struct trace *trace) strlist__for_each(pos, trace->ev_qualifier) { const char *sc = pos->s; - int id = audit_name_to_syscall(sc, trace->audit.machine); + int id = syscalltbl__id(trace->sctbl, sc); if (id < 0) { if (err == 0) { @@ -1846,7 +1375,12 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, "%ld", val); } } - } else { + } else if (IS_ERR(sc->tp_format)) { + /* + * If we managed to read the tracepoint /format file, then we + * may end up not having any args, like with gettid(), so only + * print the raw args when we didn't manage to read it. + */ int i = 0; while (i < 6) { @@ -1987,7 +1521,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, goto out_put; } - if (!trace->summary_only) + if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) trace__printf_interrupted_entry(trace, sample); ttrace->entry_time = sample->time; @@ -1998,7 +1532,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, args, trace, thread); if (sc->is_exit) { - if (!trace->duration_filter && !trace->summary_only) { + if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); fprintf(trace->output, "%-70s\n", ttrace->entry_str); } @@ -2018,6 +1552,29 @@ out_put: return err; } +static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample, + struct callchain_cursor *cursor) +{ + struct addr_location al; + + if (machine__resolve(trace->host, &al, sample) < 0 || + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) + return -1; + + return 0; +} + +static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) +{ + /* TODO: user-configurable print_opts */ + const unsigned int print_opts = EVSEL__PRINT_SYM | + EVSEL__PRINT_DSO | + EVSEL__PRINT_UNKNOWN_AS_ADDR; + + return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); +} + static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2025,7 +1582,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, long ret; u64 duration = 0; struct thread *thread; - int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -2042,7 +1599,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { + if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; @@ -2057,6 +1614,15 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } else if (trace->duration_filter) goto out; + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out; + callchain_ret = 1; + } + } + if (trace->summary_only) goto out; @@ -2073,7 +1639,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (sc->fmt == NULL) { signed_print: fprintf(trace->output, ") = %ld", ret); - } else if (ret < 0 && sc->fmt->errmsg) { + } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) { char bf[STRERR_BUFSIZE]; const char *emsg = strerror_r(-ret, bf, sizeof(bf)), *e = audit_errno_to_name(-ret); @@ -2083,10 +1649,24 @@ signed_print: fprintf(trace->output, ") = 0 Timeout"); else if (sc->fmt->hexret) fprintf(trace->output, ") = %#lx", ret); - else + else if (sc->fmt->errpid) { + struct thread *child = machine__find_thread(trace->host, ret, ret); + + if (child != NULL) { + fprintf(trace->output, ") = %ld", ret); + if (child->comm_set) + fprintf(trace->output, " (%s)", thread__comm_str(child)); + thread__put(child); + } + } else goto signed_print; fputc('\n', trace->output); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); out: ttrace->entry_pending = false; err = 0; @@ -2217,6 +1797,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { + int callchain_ret = 0; + + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out; + callchain_ret = 1; + } + } + trace__printf_interrupted_entry(trace, sample); trace__fprintf_tstamp(trace, sample->time, trace->output); @@ -2234,6 +1825,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, } fprintf(trace->output, ")\n"); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); +out: return 0; } @@ -2256,17 +1853,27 @@ static void print_location(FILE *f, struct perf_sample *sample, static int trace__pgfault(struct trace *trace, struct perf_evsel *evsel, - union perf_event *event, + union perf_event *event __maybe_unused, struct perf_sample *sample) { struct thread *thread; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct addr_location al; char map_type = 'd'; struct thread_trace *ttrace; int err = -1; + int callchain_ret = 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out_put; + callchain_ret = 1; + } + } + ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) goto out_put; @@ -2279,7 +1886,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); @@ -2292,11 +1899,11 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, cpumode, MAP__VARIABLE, + thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, cpumode, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->addr, &al); if (al.map) @@ -2308,6 +1915,11 @@ static int trace__pgfault(struct trace *trace, print_location(trace->output, sample, &al, true, false); fprintf(trace->output, " (%c%c)\n", map_type, al.level); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); out: err = 0; out_put: @@ -2327,6 +1939,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample) return false; } +static void trace__set_base_time(struct trace *trace, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + /* + * BPF events were not setting PERF_SAMPLE_TIME, so be more robust + * and don't use sample->time unconditionally, we may end up having + * some other event in the future without PERF_SAMPLE_TIME for good + * reason, i.e. we may not be interested in its timestamps, just in + * it taking place, picking some piece of information when it + * appears in our event stream (vfs_getname comes to mind). + */ + if (trace->base_time == 0 && !trace->full_time && + (evsel->attr.sample_type & PERF_SAMPLE_TIME)) + trace->base_time = sample->time; +} + static int trace__process_sample(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -2341,8 +1970,7 @@ static int trace__process_sample(struct perf_tool *tool, if (skip_sample(trace, sample)) return 0; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; + trace__set_base_time(trace, evsel, sample); if (handler) { ++trace->nr_events; @@ -2451,8 +2079,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) return true; } -static int perf_evlist__add_pgfault(struct perf_evlist *evlist, - u64 config) +static struct perf_evsel *perf_evsel__new_pgfault(u64 config) { struct perf_evsel *evsel; struct perf_event_attr attr = { @@ -2466,13 +2093,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, event_attr_init(&attr); evsel = perf_evsel__new(&attr); - if (!evsel) - return -ENOMEM; - - evsel->handler = trace__pgfault; - perf_evlist__add(evlist, evsel); + if (evsel) + evsel->handler = trace__pgfault; - return 0; + return evsel; } static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) @@ -2480,9 +2104,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st const u32 type = event->header.type; struct perf_evsel *evsel; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; - if (type != PERF_RECORD_SAMPLE) { trace__process_event(trace, trace->host, event, sample); return; @@ -2494,6 +2115,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } + trace__set_base_time(trace, evsel, sample); + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", @@ -2528,6 +2151,15 @@ static int trace__add_syscall_newtp(struct trace *trace) perf_evlist__add(evlist, sys_enter); perf_evlist__add(evlist, sys_exit); + if (callchain_param.enabled && !trace->kernel_syscallchains) { + /* + * We're interested only in the user space callchain + * leading to the syscall, allow overriding that for + * debugging reasons using --kernel_syscall_callchains + */ + sys_exit->attr.exclude_callchain_kernel = 1; + } + trace->syscalls.events.sys_enter = sys_enter; trace->syscalls.events.sys_exit = sys_exit; @@ -2566,7 +2198,7 @@ out_enomem: static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; - struct perf_evsel *evsel; + struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2580,14 +2212,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->trace_syscalls) trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); - if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { - goto out_error_mem; + if ((trace->trace_pgfaults & TRACE_PFMAJ)) { + pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); + if (pgfault_maj == NULL) + goto out_error_mem; + perf_evlist__add(evlist, pgfault_maj); } - if ((trace->trace_pgfaults & TRACE_PFMIN) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_mem; + if ((trace->trace_pgfaults & TRACE_PFMIN)) { + pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); + if (pgfault_min == NULL) + goto out_error_mem; + perf_evlist__add(evlist, pgfault_min); + } if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", @@ -2606,7 +2243,45 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts); + perf_evlist__config(evlist, &trace->opts, NULL); + + if (callchain_param.enabled) { + bool use_identifier = false; + + if (trace->syscalls.events.sys_exit) { + perf_evsel__config_callchain(trace->syscalls.events.sys_exit, + &trace->opts, &callchain_param); + use_identifier = true; + } + + if (pgfault_maj) { + perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); + use_identifier = true; + } + + if (pgfault_min) { + perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); + use_identifier = true; + } + + if (use_identifier) { + /* + * Now we have evsels with different sample_ids, use + * PERF_SAMPLE_IDENTIFIER to map from sample to evsel + * from a fixed position in each ring buffer record. + * + * As of this the changeset introducing this comment, this + * isn't strictly needed, as the fields that can come before + * PERF_SAMPLE_ID are all used, but we'll probably disable + * some of those for things like copying the payload of + * pointer syscall arguments, and for vfs_getname we don't + * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this + * here as a warning we need to use PERF_SAMPLE_IDENTIFIER. + */ + perf_evlist__set_sample_bit(evlist, IDENTIFIER); + perf_evlist__reset_sample_bit(evlist, ID); + } + } signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); @@ -2884,15 +2559,29 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } +DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, + struct stats *stats; + double msecs; + int syscall; +) +{ + struct int_node *source = rb_entry(nd, struct int_node, rb_node); + struct stats *stats = source->priv; + + entry->syscall = source->i; + entry->stats = stats; + entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; +} + static size_t thread__dump_stats(struct thread_trace *ttrace, struct trace *trace, FILE *fp) { - struct stats *stats; size_t printed = 0; struct syscall *sc; - struct int_node *inode = intlist__first(ttrace->syscall_stats); + struct rb_node *nd; + DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); - if (inode == NULL) + if (syscall_stats == NULL) return 0; printed += fprintf(fp, "\n"); @@ -2901,9 +2590,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); - /* each int_node is a syscall */ - while (inode) { - stats = inode->priv; + resort_rb__for_each(nd, syscall_stats) { + struct stats *stats = syscall_stats_entry->stats; if (stats) { double min = (double)(stats->min) / NSEC_PER_MSEC; double max = (double)(stats->max) / NSEC_PER_MSEC; @@ -2914,34 +2602,23 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; avg /= NSEC_PER_MSEC; - sc = &trace->syscalls.table[inode->i]; + sc = &trace->syscalls.table[syscall_stats_entry->syscall]; printed += fprintf(fp, " %-15s", sc->name); printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", - n, avg * n, min, avg); + n, syscall_stats_entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } - - inode = intlist__next(inode); } + resort_rb__delete(syscall_stats); printed += fprintf(fp, "\n\n"); return printed; } -/* struct used to pass data to per-thread function */ -struct summary_data { - FILE *fp; - struct trace *trace; - size_t printed; -}; - -static int trace__fprintf_one_thread(struct thread *thread, void *priv) +static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) { - struct summary_data *data = priv; - FILE *fp = data->fp; - size_t printed = data->printed; - struct trace *trace = data->trace; + size_t printed = 0; struct thread_trace *ttrace = thread__priv(thread); double ratio; @@ -2957,25 +2634,45 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); if (ttrace->pfmin) printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); - printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + if (trace->sched) + printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + else if (fputc('\n', fp) != EOF) + ++printed; + printed += thread__dump_stats(ttrace, trace, fp); - data->printed += printed; + return printed; +} - return 0; +static unsigned long thread__nr_events(struct thread_trace *ttrace) +{ + return ttrace ? ttrace->nr_events : 0; +} + +DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - struct summary_data data = { - .fp = fp, - .trace = trace - }; - data.printed = trace__fprintf_threads_header(fp); + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); + size_t printed = trace__fprintf_threads_header(fp); + struct rb_node *nd; + + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); + resort_rb__for_each(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); - return data.printed; + resort_rb__delete(threads); + + return printed; } static int trace__set_duration(const struct option *opt, const char *str, @@ -3071,10 +2768,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct trace trace = { - .audit = { - .machine = audit_detect_machine(), - .open_id = audit_name_to_syscall("open", trace.audit.machine), - }, .syscalls = { . max = -1, }, @@ -3092,6 +2785,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .output = stderr, .show_comm = true, .trace_syscalls = true, + .kernel_syscallchains = false, + .max_stack = UINT_MAX, }; const char *output_name = NULL; const char *ev_qualifier_str = NULL; @@ -3137,10 +2832,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), + OPT_CALLBACK(0, "call-graph", &trace.opts, + "record_mode[,record_size]", record_callchain_help, + &record_parse_callchain_opt), + OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, + "Show the kernel callchains on the syscall exit path"), + OPT_UINTEGER(0, "min-stack", &trace.min_stack, + "Set the minimum stack depth when parsing the callchain, " + "anything below the specified depth will be ignored."), + OPT_UINTEGER(0, "max-stack", &trace.max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_END() }; + bool __maybe_unused max_stack_user_set = true; + bool mmap_pages_user_set = true; const char * const trace_subcommands[] = { "record", NULL }; int err; char bf[BUFSIZ]; @@ -3149,8 +2858,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) signal(SIGFPE, sighandler_dump_stack); trace.evlist = perf_evlist__new(); + trace.sctbl = syscalltbl__new(); - if (trace.evlist == NULL) { + if (trace.evlist == NULL || trace.sctbl == NULL) { pr_err("Not enough memory to run!\n"); err = -ENOMEM; goto out; @@ -3159,11 +2869,40 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); + err = bpf__setup_stdout(trace.evlist); + if (err) { + bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); + pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); + goto out; + } + + err = -1; + if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if (trace.opts.mmap_pages == UINT_MAX) + mmap_pages_user_set = false; + + if (trace.max_stack == UINT_MAX) { + trace.max_stack = sysctl_perf_event_max_stack; + max_stack_user_set = false; + } + +#ifdef HAVE_DWARF_UNWIND_SUPPORT + if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) + record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); +#endif + + if (callchain_param.enabled) { + if (!mmap_pages_user_set && geteuid() == 0) + trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; + + symbol_conf.use_callchain = true; + } + if (trace.evlist->nr_entries > 0) evlist__set_evsel_handler(trace.evlist, trace__event_handler); @@ -3180,6 +2919,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + if (!trace.trace_syscalls && ev_qualifier_str) { + pr_err("The -e option can't be used with --no-syscalls.\n"); + goto out; + } + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { @@ -3188,6 +2932,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) } } + trace.open_id = syscalltbl__id(trace.sctbl, "open"); + if (ev_qualifier_str != NULL) { const char *s = ev_qualifier_str; struct strlist_config slist_config = { diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3f871b54e261..41c24010ab43 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -7,38 +7,38 @@ extern const char perf_usage_string[]; extern const char perf_more_info_string[]; -extern void list_common_cmds_help(void); -extern const char *help_unknown_cmd(const char *cmd); -extern void prune_packed_objects(int); -extern int read_line_with_nul(char *buf, int size, FILE *file); -extern int check_pager_config(const char *cmd); +void list_common_cmds_help(void); +const char *help_unknown_cmd(const char *cmd); +void prune_packed_objects(int); +int read_line_with_nul(char *buf, int size, FILE *file); +int check_pager_config(const char *cmd); -extern int cmd_annotate(int argc, const char **argv, const char *prefix); -extern int cmd_bench(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); -extern int cmd_config(int argc, const char **argv, const char *prefix); -extern int cmd_diff(int argc, const char **argv, const char *prefix); -extern int cmd_evlist(int argc, const char **argv, const char *prefix); -extern int cmd_help(int argc, const char **argv, const char *prefix); -extern int cmd_sched(int argc, const char **argv, const char *prefix); -extern int cmd_list(int argc, const char **argv, const char *prefix); -extern int cmd_record(int argc, const char **argv, const char *prefix); -extern int cmd_report(int argc, const char **argv, const char *prefix); -extern int cmd_stat(int argc, const char **argv, const char *prefix); -extern int cmd_timechart(int argc, const char **argv, const char *prefix); -extern int cmd_top(int argc, const char **argv, const char *prefix); -extern int cmd_script(int argc, const char **argv, const char *prefix); -extern int cmd_version(int argc, const char **argv, const char *prefix); -extern int cmd_probe(int argc, const char **argv, const char *prefix); -extern int cmd_kmem(int argc, const char **argv, const char *prefix); -extern int cmd_lock(int argc, const char **argv, const char *prefix); -extern int cmd_kvm(int argc, const char **argv, const char *prefix); -extern int cmd_test(int argc, const char **argv, const char *prefix); -extern int cmd_trace(int argc, const char **argv, const char *prefix); -extern int cmd_inject(int argc, const char **argv, const char *prefix); -extern int cmd_mem(int argc, const char **argv, const char *prefix); -extern int cmd_data(int argc, const char **argv, const char *prefix); +int cmd_annotate(int argc, const char **argv, const char *prefix); +int cmd_bench(int argc, const char **argv, const char *prefix); +int cmd_buildid_cache(int argc, const char **argv, const char *prefix); +int cmd_buildid_list(int argc, const char **argv, const char *prefix); +int cmd_config(int argc, const char **argv, const char *prefix); +int cmd_diff(int argc, const char **argv, const char *prefix); +int cmd_evlist(int argc, const char **argv, const char *prefix); +int cmd_help(int argc, const char **argv, const char *prefix); +int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_list(int argc, const char **argv, const char *prefix); +int cmd_record(int argc, const char **argv, const char *prefix); +int cmd_report(int argc, const char **argv, const char *prefix); +int cmd_stat(int argc, const char **argv, const char *prefix); +int cmd_timechart(int argc, const char **argv, const char *prefix); +int cmd_top(int argc, const char **argv, const char *prefix); +int cmd_script(int argc, const char **argv, const char *prefix); +int cmd_version(int argc, const char **argv, const char *prefix); +int cmd_probe(int argc, const char **argv, const char *prefix); +int cmd_kmem(int argc, const char **argv, const char *prefix); +int cmd_lock(int argc, const char **argv, const char *prefix); +int cmd_kvm(int argc, const char **argv, const char *prefix); +int cmd_test(int argc, const char **argv, const char *prefix); +int cmd_trace(int argc, const char **argv, const char *prefix); +int cmd_inject(int argc, const char **argv, const char *prefix); +int cmd_mem(int argc, const char **argv, const char *prefix); +int cmd_data(int argc, const char **argv, const char *prefix); -extern int find_scripts(char **scripts_array, char **scripts_path_array); +int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index eca6a912e8c2..5ad0255f8756 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -23,11 +23,17 @@ $(call detected_var,ARCH) NO_PERF_REGS := 1 +# Additional ARCH settings for ppc +ifeq ($(ARCH),powerpc) + NO_PERF_REGS := 0 + LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 +endif + # Additional ARCH settings for x86 ifeq ($(ARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 $(call detected,CONFIG_X86_64) @@ -109,7 +115,7 @@ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET endif -include $(src-perf)/config/utilities.mak +include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) dummy := $(error Error: $(FLEX) is missing on this system, please install it) @@ -268,6 +274,12 @@ else ifneq ($(feature-dwarf), 1) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 + else + ifneq ($(feature-dwarf_getlocations), 1) + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157); + else + CFLAGS += -DHAVE_DWARF_GETLOCATIONS + endif # dwarf_getlocations endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -289,9 +301,6 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif - # include ARCH specific config - -include $(src-perf)/arch/$(ARCH)/Makefile - ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 6461e02ab940..3573f315f955 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -92,6 +92,22 @@ error: return ret; } +static int use_arch_timestamp; + +static inline uint64_t +get_arch_timestamp(void) +{ +#if defined(__i386__) || defined(__x86_64__) + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((uint64_t)high) << 32; +#else + return 0; +#endif +} + #define NSEC_PER_SEC 1000000000 static int perf_clk_id = CLOCK_MONOTONIC; @@ -107,6 +123,9 @@ perf_get_timestamp(void) struct timespec ts; int ret; + if (use_arch_timestamp) + return get_arch_timestamp(); + ret = clock_gettime(perf_clk_id, &ts); if (ret) return 0; @@ -203,6 +222,17 @@ perf_close_marker_file(void) munmap(marker_addr, pgsz); } +static void +init_arch_timestamp(void) +{ + char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP"); + + if (!str || !*str || !strcmp(str, "0")) + return; + + use_arch_timestamp = 1; +} + void *jvmti_open(void) { int pad_cnt; @@ -211,11 +241,17 @@ void *jvmti_open(void) int fd; FILE *fp; + init_arch_timestamp(); + /* * check if clockid is supported */ - if (!perf_get_timestamp()) - warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + if (!perf_get_timestamp()) { + if (use_arch_timestamp) + warnx("jvmti: arch timestamp not supported"); + else + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + } memset(&header, 0, sizeof(header)); @@ -263,6 +299,9 @@ void *jvmti_open(void) header.timestamp = perf_get_timestamp(); + if (use_arch_timestamp) + header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (!fwrite(&header, sizeof(header), 1, fp)) { warn("jvmti: cannot write dumpfile header"); goto error; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aaee0a782747..797000842d40 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -17,6 +17,7 @@ #include <subcmd/parse-options.h> #include "util/bpf-loader.h" #include "util/debug.h" +#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <pthread.h> #include <stdlib.h> @@ -308,9 +309,11 @@ static int handle_alias(int *argcp, const char ***argv) if (*argcp > 1) { struct strbuf buf; - strbuf_init(&buf, PATH_MAX); - strbuf_addstr(&buf, alias_string); - sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); + if (strbuf_init(&buf, PATH_MAX) < 0 || + strbuf_addstr(&buf, alias_string) < 0 || + sq_quote_argv(&buf, (*argv) + 1, + PATH_MAX) < 0) + die("Failed to allocate memory."); free(alias_string); alias_string = buf.buf; } @@ -533,6 +536,7 @@ int main(int argc, const char **argv) { const char *cmd; char sbuf[STRERR_BUFSIZE]; + int value; /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); @@ -542,6 +546,9 @@ int main(int argc, const char **argv) page_size = sysconf(_SC_PAGE_SIZE); cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) + sysctl_perf_event_max_stack = value; + cmd = extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; @@ -549,6 +556,7 @@ int main(int argc, const char **argv) srandom(time(NULL)); perf_config(perf_default_config, NULL); + set_buildid_dir(NULL); /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); @@ -572,7 +580,6 @@ int main(int argc, const char **argv) } if (!prefixcmp(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT - set_buildid_dir(NULL); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); @@ -587,7 +594,6 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_buildid_dir(NULL); if (argc > 0) { if (!prefixcmp(argv[0], "--")) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 5381a01c0610..cd8f1b150f9e 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,7 +52,6 @@ struct record_opts { bool sample_weight; bool sample_time; bool sample_time_set; - bool callgraph_set; bool period; bool running_time; bool full_auxtrace; diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 1b02cdc0cab6..7656ff8aa066 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -34,10 +34,9 @@ import datetime # # ubuntu: # -# $ sudo apt-get install postgresql +# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql # $ sudo su - postgres -# $ createuser <your user id here> -# Shall the new role be a superuser? (y/n) y +# $ createuser -s <your user id here> # # An example of using this script with Intel PT: # @@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ perf_db_export_mode = True perf_db_export_calls = False +perf_db_export_callchains = False + def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]" + print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]" print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls table" + print >> sys.stderr, " calls 'calls' => create calls and call_paths table" + print >> sys.stderr, " callchains 'callchains' => create call_paths table" raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -246,9 +248,11 @@ if columns not in ("all", "branches"): branches = (columns == "branches") -if (len(sys.argv) >= 4): - if (sys.argv[3] == "calls"): +for i in range(3,len(sys.argv)): + if (sys.argv[i] == "calls"): perf_db_export_calls = True + elif (sys.argv[i] == "callchains"): + perf_db_export_callchains = True else: usage() @@ -359,14 +363,16 @@ else: 'transaction bigint,' 'data_src bigint,' 'branch_type integer,' - 'in_tx boolean)') + 'in_tx boolean,' + 'call_path_id bigint)') -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' 'id bigint NOT NULL,' 'parent_id bigint,' 'symbol_id bigint,' 'ip bigint)') +if perf_db_export_calls: do_query(query, 'CREATE TABLE calls (' 'id bigint NOT NULL,' 'thread_id bigint,' @@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS ' '(SELECT tid FROM threads WHERE id = thread_id) AS tid' ' FROM comm_threads') -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE VIEW call_paths_view AS ' 'SELECT ' 'c.id,' @@ -444,6 +450,7 @@ if perf_db_export_calls: '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') +if perf_db_export_calls: do_query(query, 'CREATE VIEW calls_view AS ' 'SELECT ' 'calls.id,' @@ -541,8 +548,9 @@ dso_file = open_output_file("dso_table.bin") symbol_file = open_output_file("symbol_table.bin") branch_type_file = open_output_file("branch_type_table.bin") sample_file = open_output_file("sample_table.bin") -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: call_path_file = open_output_file("call_path_table.bin") +if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): @@ -554,8 +562,8 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - if perf_db_export_calls: + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) unhandled_count = 0 @@ -571,8 +579,9 @@ def trace_end(): copy_output_file(symbol_file, "symbols") copy_output_file(branch_type_file, "branch_types") copy_output_file(sample_file, "samples") - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: copy_output_file(call_path_file, "call_paths") + if perf_db_export_calls: copy_output_file(call_file, "calls") print datetime.datetime.today(), "Removing intermediate files..." @@ -585,8 +594,9 @@ def trace_end(): remove_output_file(symbol_file) remove_output_file(branch_type_file) remove_output_file(sample_file) - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: remove_output_file(call_path_file) + if perf_db_export_calls: remove_output_file(call_file) os.rmdir(output_dir_name) print datetime.datetime.today(), "Adding primary keys" @@ -599,8 +609,9 @@ def trace_end(): do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)') - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') + if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') print datetime.datetime.today(), "Adding foreign keys" @@ -623,10 +634,11 @@ def trace_end(): 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),' 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),' 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)') - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'ALTER TABLE call_paths ' 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),' 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)') + if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ' 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),' 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' @@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x): value = struct.pack(fmt, 2, 4, branch_type, n, name) branch_type_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x): +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) else: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) sample_file.write(value) def call_path_table(cp_id, parent_id, symbol_id, ip, *x): diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1ba628ed049a..66a28982547b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -37,6 +37,8 @@ perf-y += topology.o perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o +perf-y += event-times.o +perf-y += backward-ring-buffer.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c new file mode 100644 index 000000000000..d9ba991a9a30 --- /dev/null +++ b/tools/perf/tests/backward-ring-buffer.c @@ -0,0 +1,151 @@ +/* + * Test backward bit in event attribute, read ring buffer from end to + * beginning + */ + +#include <perf.h> +#include <evlist.h> +#include <sys/prctl.h> +#include "tests.h" +#include "debug.h" + +#define NR_ITERS 111 + +static void testcase(void) +{ + int i; + + for (i = 0; i < NR_ITERS; i++) { + char proc_name[10]; + + snprintf(proc_name, sizeof(proc_name), "p:%d\n", i); + prctl(PR_SET_NAME, proc_name); + } +} + +static int count_samples(struct perf_evlist *evlist, int *sample_count, + int *comm_count) +{ + int i; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + perf_evlist__mmap_read_catchup(evlist, i); + while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) { + const u32 type = event->header.type; + + switch (type) { + case PERF_RECORD_SAMPLE: + (*sample_count)++; + break; + case PERF_RECORD_COMM: + (*comm_count)++; + break; + default: + pr_err("Unexpected record of type %d\n", type); + return TEST_FAIL; + } + } + } + return TEST_OK; +} + +static int do_test(struct perf_evlist *evlist, int mmap_pages, + int *sample_count, int *comm_count) +{ + int err; + char sbuf[STRERR_BUFSIZE]; + + err = perf_evlist__mmap(evlist, mmap_pages, true); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + return TEST_FAIL; + } + + perf_evlist__enable(evlist); + testcase(); + perf_evlist__disable(evlist); + + err = count_samples(evlist, sample_count, comm_count); + perf_evlist__munmap(evlist); + return err; +} + + +int test__backward_ring_buffer(int subtest __maybe_unused) +{ + int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0; + char pid[16], sbuf[STRERR_BUFSIZE]; + struct perf_evlist *evlist; + struct perf_evsel *evsel __maybe_unused; + struct parse_events_error parse_error; + struct record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .freq = 0, + .mmap_pages = 256, + .default_interval = 1, + }; + + snprintf(pid, sizeof(pid), "%d", getpid()); + pid[sizeof(pid) - 1] = '\0'; + opts.target.tid = opts.target.pid = pid; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("No ehough memory to create evlist\n"); + return TEST_FAIL; + } + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + goto out_delete_evlist; + } + + bzero(&parse_error, sizeof(parse_error)); + err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error); + if (err) { + pr_debug("Failed to parse tracepoint event, try use root\n"); + ret = TEST_SKIP; + goto out_delete_evlist; + } + + perf_evlist__config(evlist, &opts, NULL); + + /* Set backward bit, ring buffer should be writing from end */ + evlist__for_each(evlist, evsel) + evsel->attr.write_backward = 1; + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + goto out_delete_evlist; + } + + ret = TEST_FAIL; + err = do_test(evlist, opts.mmap_pages, &sample_count, + &comm_count); + if (err != TEST_OK) + goto out_delete_evlist; + + if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) { + pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n", + sample_count, comm_count); + goto out_delete_evlist; + } + + err = do_test(evlist, 1, &sample_count, &comm_count); + if (err != TEST_OK) + goto out_delete_evlist; + + ret = TEST_OK; +out_delete_evlist: + perf_evlist__delete(evlist); + return ret; +} diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 199501c71e27..f31eed31c1a9 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -138,7 +138,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), perf_evlist__splice_list_tail(evlist, &parse_evlist.list); evlist->nr_groups = parse_evlist.nr_groups; - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); err = perf_evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f2b1dcac45d3..0e95c20ecf6e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -204,6 +204,14 @@ static struct test generic_tests[] = { .func = test__event_update, }, { + .desc = "Test events times", + .func = test__event_times, + }, + { + .desc = "Test backward reading from ring buffer", + .func = test__backward_ring_buffer, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index afc9ad0a0515..68a69a195545 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -293,7 +293,6 @@ static int process_sample_event(struct machine *machine, { struct perf_sample sample; struct thread *thread; - u8 cpumode; int ret; if (perf_evlist__parse_sample(evlist, event, &sample)) { @@ -307,9 +306,7 @@ static int process_sample_event(struct machine *machine, return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - ret = read_object_code(sample.ip, READLEN, cpumode, thread, state); + ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state); thread__put(thread); return ret; } @@ -535,7 +532,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index dc673ff7c437..8cf0d9e189a8 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -202,7 +202,7 @@ static int dsos__create(int cnt, int size) { int i; - dsos = malloc(sizeof(dsos) * cnt); + dsos = malloc(sizeof(*dsos) * cnt); TEST_ASSERT_VAL("failed to alloc dsos array", dsos); for (i = 0; i < cnt; i++) { diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 1c5c0221cea2..8f6eb853aaf7 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -20,10 +20,10 @@ static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_mmap2_event(machine, event, NULL); + return machine__process_mmap2_event(machine, event, sample); } static int init_live_machine(struct machine *machine) diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c new file mode 100644 index 000000000000..95fb744f6628 --- /dev/null +++ b/tools/perf/tests/event-times.c @@ -0,0 +1,236 @@ +#include <linux/compiler.h> +#include <string.h> +#include "tests.h" +#include "evlist.h" +#include "evsel.h" +#include "util.h" +#include "debug.h" +#include "thread_map.h" +#include "target.h" + +static int attach__enable_on_exec(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct target target = { + .uid = UINT_MAX, + }; + const char *argv[] = { "true", NULL, }; + char sbuf[STRERR_BUFSIZE]; + int err; + + pr_debug("attaching to spawned child, enable on exec\n"); + + err = perf_evlist__create_maps(evlist, &target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + return err; + } + + err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL); + if (err < 0) { + pr_debug("Couldn't run the workload!\n"); + return err; + } + + evsel->attr.enable_on_exec = 1; + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + return err; + } + + return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; +} + +static int detach__enable_on_exec(struct perf_evlist *evlist) +{ + waitpid(evlist->workload.pid, NULL, 0); + return 0; +} + +static int attach__current_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as disabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_thread(evsel, threads); + if (err) { + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + thread_map__put(threads); + return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL; +} + +static int attach__current_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as enabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("failed to call thread_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_thread(evsel, threads); + + thread_map__put(threads); + return err == 0 ? TEST_OK : TEST_FAIL; +} + +static int detach__disable(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + + return perf_evsel__enable(evsel); +} + +static int attach__cpu_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err) { + if (err == -EACCES) + return TEST_SKIP; + + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + cpu_map__put(cpus); + return perf_evsel__enable(evsel); +} + +static int attach__cpu_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err == -EACCES) + return TEST_SKIP; + + cpu_map__put(cpus); + return err ? TEST_FAIL : TEST_OK; +} + +static int test_times(int (attach)(struct perf_evlist *), + int (detach)(struct perf_evlist *)) +{ + struct perf_counts_values count; + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel; + int err = -1, i; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("failed to create event list\n"); + goto out_err; + } + + err = parse_events(evlist, "cpu-clock:u", NULL); + if (err) { + pr_debug("failed to parse event cpu-clock:u\n"); + goto out_err; + } + + evsel = perf_evlist__last(evlist); + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + err = attach(evlist); + if (err == TEST_SKIP) { + pr_debug(" SKIP : not enough rights\n"); + return err; + } + + TEST_ASSERT_VAL("failed to attach", !err); + + for (i = 0; i < 100000000; i++) { } + + TEST_ASSERT_VAL("failed to detach", !detach(evlist)); + + perf_evsel__read(evsel, 0, 0, &count); + + err = !(count.ena == count.run); + + pr_debug(" %s: ena %" PRIu64", run %" PRIu64"\n", + !err ? "OK " : "FAILED", + count.ena, count.run); + +out_err: + if (evlist) + perf_evlist__delete(evlist); + return !err ? TEST_OK : TEST_FAIL; +} + +/* + * This test creates software event 'cpu-clock' + * attaches it in several ways (explained below) + * and checks that enabled and running times + * match. + */ +int test__event_times(int subtest __maybe_unused) +{ + int err, ret = 0; + +#define _T(attach, detach) \ + err = test_times(attach, detach); \ + if (err && (ret == TEST_OK || ret == TEST_SKIP)) \ + ret = err; + + /* attach on newly spawned process after exec */ + _T(attach__enable_on_exec, detach__enable_on_exec) + /* attach on current process as enabled */ + _T(attach__current_enabled, detach__disable) + /* attach on current process as disabled */ + _T(attach__current_disabled, detach__disable) + /* attach on cpu as disabled */ + _T(attach__cpu_disabled, detach__disable) + /* attach on cpu as enabled */ + _T(attach__cpu_enabled, detach__disable) + +#undef _T + return ret; +} diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 012eab5d1df1..63ecf21750eb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -30,7 +30,7 @@ static int process_event_scale(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE); - TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123); + TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123); return 0; } diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 071a8b5f5232..6b21746d6eec 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -100,9 +100,11 @@ struct machine *setup_fake_machine(struct machines *machines) } for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { + struct perf_sample sample = { + .cpumode = PERF_RECORD_MISC_USER, + }; union perf_event fake_mmap_event = { .mmap = { - .header = { .misc = PERF_RECORD_MISC_USER, }, .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, @@ -114,7 +116,7 @@ struct machine *setup_fake_machine(struct machines *machines) strcpy(fake_mmap_event.mmap.filename, fake_mmap_info[i].filename); - machine__process_mmap_event(machine, &fake_mmap_event, NULL); + machine__process_mmap_event(machine, &fake_mmap_event, &sample); } for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { @@ -159,7 +161,7 @@ void print_hists_in(struct hists *hists) struct rb_root *root; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index ecf136c385d5..a9e3db3afac4 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -81,11 +81,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -97,16 +92,16 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) else iter.ops = &hist_iter_normal; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; sample.callchain = (struct ip_callchain *)fake_callchains[i]; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; @@ -131,7 +126,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 34b945a55d4d..e846f8c42013 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -58,11 +58,6 @@ static int add_hist_entries(struct perf_evlist *evlist, */ evlist__for_each(evlist, evsel) { for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -76,17 +71,17 @@ static int add_hist_entries(struct perf_evlist *evlist, hists->dso_filter = NULL; hists->symbol_filter_str = NULL; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; al.socket = fake_samples[i].socket; if (hist_entry_iter__add(&iter, &al, - PERF_MAX_STACK_DEPTH, NULL) < 0) { + sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; } diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 64b257d8d557..acf5a1301c07 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -76,17 +76,12 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct hists *hists = evsel__hists(evsel); for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_common_samples[k].pid; sample.tid = fake_common_samples[k].pid; sample.ip = fake_common_samples[k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, @@ -102,17 +97,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) } for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - sample.pid = fake_samples[i][k].pid; sample.tid = fake_samples[i][k].pid; sample.ip = fake_samples[i][k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, @@ -157,7 +145,7 @@ static int __validate_match(struct hists *hists) /* * Only entries from fake_common_samples should have a pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -209,7 +197,7 @@ static int __validate_link(struct hists *hists, int idx) * and some entries will have no pair. However every entry * in other hists should have (dummy) pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 23cce67c7e48..63c5efaba1b5 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -51,11 +51,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -63,16 +58,16 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .hide_unresolved = false, }; + sample.cpumode = PERF_RECORD_MISC_USER; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; @@ -97,7 +92,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index ddb78fae064a..614e45a3c603 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -80,7 +80,7 @@ int test__keep_tracking(int subtest __maybe_unused) CHECK__(parse_events(evlist, "dummy:u", NULL)); CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 53c2273e8859..ad1cb63139a7 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -73,7 +73,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) } /* - * Here we need to explicitely preallocate the counts, as if + * Here we need to explicitly preallocate the counts, as if * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index eb99a105f31c..4344fe482c1d 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -44,7 +44,7 @@ int test__syscall_openat_tp_fields(int subtest __maybe_unused) goto out_delete_evlist; } - perf_evsel__config(evsel, &opts); + perf_evsel__config(evsel, &opts, NULL); thread_map__set_pid(evlist->threads, 0, getpid()); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 1cc78cefe399..b836ee6a8d9b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -99,7 +99,7 @@ int test__PERF_RECORD(int subtest __maybe_unused) perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TID); perf_evsel__set_sample_bit(evsel, TIME); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); if (err < 0) { diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg index 238aa3927c71..f2d9c5fe58e0 100755 --- a/tools/perf/tests/perf-targz-src-pkg +++ b/tools/perf/tests/perf-targz-src-pkg @@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d) tar xf ${TARBALL} -C $TMP_DEST rm -f ${TARBALL} cd - > /dev/null -make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1 +make -C $TMP_DEST/perf*/tools/perf > /dev/null RC=$? rm -rf ${TMP_DEST} exit $RC diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index ebd80168d51e..39a689bf7574 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -417,7 +417,7 @@ int test__switch_tracking(int subtest __maybe_unused) perf_evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ if (cycles_evsel != perf_evlist__first(evlist)) { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 82b2b5e6ba7c..c57e72c826d2 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -85,6 +85,8 @@ int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); int test__synthesize_stat_round(int subtest); int test__event_update(int subtest); +int test__event_times(int subtest); +int test__backward_ring_buffer(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 630b0b409b97..e63abab7d5a1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -54,8 +54,14 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) * Step 3: * * Load and split /proc/kallsyms into multiple maps, one per module. + * Do not use kcore, as this test was designed before kcore support + * and has parts that only make sense if using the non-kcore code. + * XXX: extend it to stress the kcorre code as well, hint: the list + * of modules extracted from /proc/kcore, in its current form, can't + * be compacted against the list of modules found in the "vmlinux" + * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) { + if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) { pr_debug("dso__load_kallsyms "); goto out; } @@ -157,6 +163,9 @@ next_pair: pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", mem_start, sym->name, pair->name); + } else { + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, first_pair->name); } } } else diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c new file mode 100644 index 000000000000..d64f4a9128a1 --- /dev/null +++ b/tools/perf/trace/beauty/eventfd.c @@ -0,0 +1,38 @@ +#include <sys/eventfd.h> + +#ifndef EFD_SEMAPHORE +#define EFD_SEMAPHORE 1 +#endif + +#ifndef EFD_NONBLOCK +#define EFD_NONBLOCK 00004000 +#endif + +#ifndef EFD_CLOEXEC +#define EFD_CLOEXEC 02000000 +#endif + +static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_FLAG(n) \ + if (flags & EFD_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~EFD_##n; \ + } + + P_FLAG(SEMAPHORE); + P_FLAG(CLOEXEC); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c new file mode 100644 index 000000000000..021bb48c6336 --- /dev/null +++ b/tools/perf/trace/beauty/flock.c @@ -0,0 +1,31 @@ + +static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, op = arg->val; + + if (op == 0) + return scnprintf(bf, size, "NONE"); +#define P_CMD(cmd) \ + if ((op & LOCK_##cmd) == LOCK_##cmd) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ + op &= ~LOCK_##cmd; \ + } + + P_CMD(SH); + P_CMD(EX); + P_CMD(NB); + P_CMD(UN); + P_CMD(MAND); + P_CMD(RW); + P_CMD(READ); + P_CMD(WRITE); +#undef P_OP + + if (op) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); + + return printed; +} + +#define SCA_FLOCK syscall_arg__scnprintf_flock diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c new file mode 100644 index 000000000000..e2476211f22d --- /dev/null +++ b/tools/perf/trace/beauty/futex_op.c @@ -0,0 +1,44 @@ +#include <linux/futex.h> + +static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) +{ + enum syscall_futex_args { + SCF_UADDR = (1 << 0), + SCF_OP = (1 << 1), + SCF_VAL = (1 << 2), + SCF_TIMEOUT = (1 << 3), + SCF_UADDR2 = (1 << 4), + SCF_VAL3 = (1 << 5), + }; + int op = arg->val; + int cmd = op & FUTEX_CMD_MASK; + size_t printed = 0; + + switch (cmd) { +#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); + P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(WAKE_OP); break; + P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; + P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; + P_FUTEX_OP(WAIT_REQUEUE_PI); break; + default: printed = scnprintf(bf, size, "%#x", cmd); break; + } + + if (op & FUTEX_PRIVATE_FLAG) + printed += scnprintf(bf + printed, size - printed, "|PRIV"); + + if (op & FUTEX_CLOCK_REALTIME) + printed += scnprintf(bf + printed, size - printed, "|CLKRT"); + + return printed; +} + +#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c new file mode 100644 index 000000000000..3444a4d5382d --- /dev/null +++ b/tools/perf/trace/beauty/mmap.c @@ -0,0 +1,158 @@ +#include <sys/mman.h> + +static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, prot = arg->val; + + if (prot == PROT_NONE) + return scnprintf(bf, size, "NONE"); +#define P_MMAP_PROT(n) \ + if (prot & PROT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + prot &= ~PROT_##n; \ + } + + P_MMAP_PROT(EXEC); + P_MMAP_PROT(READ); + P_MMAP_PROT(WRITE); +#ifdef PROT_SEM + P_MMAP_PROT(SEM); +#endif + P_MMAP_PROT(GROWSDOWN); + P_MMAP_PROT(GROWSUP); +#undef P_MMAP_PROT + + if (prot) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); + + return printed; +} + +#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot + +#ifndef MAP_STACK +# define MAP_STACK 0x20000 +#endif + +static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_MMAP_FLAG(n) \ + if (flags & MAP_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MAP_##n; \ + } + + P_MMAP_FLAG(SHARED); + P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT + P_MMAP_FLAG(32BIT); +#endif + P_MMAP_FLAG(ANONYMOUS); + P_MMAP_FLAG(DENYWRITE); + P_MMAP_FLAG(EXECUTABLE); + P_MMAP_FLAG(FILE); + P_MMAP_FLAG(FIXED); + P_MMAP_FLAG(GROWSDOWN); +#ifdef MAP_HUGETLB + P_MMAP_FLAG(HUGETLB); +#endif + P_MMAP_FLAG(LOCKED); + P_MMAP_FLAG(NONBLOCK); + P_MMAP_FLAG(NORESERVE); + P_MMAP_FLAG(POPULATE); + P_MMAP_FLAG(STACK); +#ifdef MAP_UNINITIALIZED + P_MMAP_FLAG(UNINITIALIZED); +#endif +#undef P_MMAP_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags + +static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_MREMAP_FLAG(n) \ + if (flags & MREMAP_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MREMAP_##n; \ + } + + P_MREMAP_FLAG(MAYMOVE); +#ifdef MREMAP_FIXED + P_MREMAP_FLAG(FIXED); +#endif +#undef P_MREMAP_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags + +#ifndef MADV_HWPOISON +#define MADV_HWPOISON 100 +#endif + +#ifndef MADV_MERGEABLE +#define MADV_MERGEABLE 12 +#endif + +#ifndef MADV_UNMERGEABLE +#define MADV_UNMERGEABLE 13 +#endif + +static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, + struct syscall_arg *arg) +{ + int behavior = arg->val; + + switch (behavior) { +#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) + P_MADV_BHV(NORMAL); + P_MADV_BHV(RANDOM); + P_MADV_BHV(SEQUENTIAL); + P_MADV_BHV(WILLNEED); + P_MADV_BHV(DONTNEED); + P_MADV_BHV(REMOVE); + P_MADV_BHV(DONTFORK); + P_MADV_BHV(DOFORK); + P_MADV_BHV(HWPOISON); +#ifdef MADV_SOFT_OFFLINE + P_MADV_BHV(SOFT_OFFLINE); +#endif + P_MADV_BHV(MERGEABLE); + P_MADV_BHV(UNMERGEABLE); +#ifdef MADV_HUGEPAGE + P_MADV_BHV(HUGEPAGE); +#endif +#ifdef MADV_NOHUGEPAGE + P_MADV_BHV(NOHUGEPAGE); +#endif +#ifdef MADV_DONTDUMP + P_MADV_BHV(DONTDUMP); +#endif +#ifdef MADV_DODUMP + P_MADV_BHV(DODUMP); +#endif +#undef P_MADV_PHV + default: break; + } + + return scnprintf(bf, size, "%#x", behavior); +} + +#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c new file mode 100644 index 000000000000..930d8fef2400 --- /dev/null +++ b/tools/perf/trace/beauty/mode_t.c @@ -0,0 +1,68 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +/* From include/linux/stat.h */ +#ifndef S_IRWXUGO +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) +#endif +#ifndef S_IALLUGO +#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +#endif +#ifndef S_IRUGO +#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) +#endif +#ifndef S_IWUGO +#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) +#endif +#ifndef S_IXUGO +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) +#endif + +static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, mode = arg->val; + +#define P_MODE(n) \ + if ((mode & S_##n) == S_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + mode &= ~S_##n; \ + } + + P_MODE(IALLUGO); + P_MODE(IRWXUGO); + P_MODE(IRUGO); + P_MODE(IWUGO); + P_MODE(IXUGO); + P_MODE(IFMT); + P_MODE(IFSOCK); + P_MODE(IFLNK); + P_MODE(IFREG); + P_MODE(IFBLK); + P_MODE(IFDIR); + P_MODE(IFCHR); + P_MODE(IFIFO); + P_MODE(ISUID); + P_MODE(ISGID); + P_MODE(ISVTX); + P_MODE(IRWXU); + P_MODE(IRUSR); + P_MODE(IWUSR); + P_MODE(IXUSR); + P_MODE(IRWXG); + P_MODE(IRGRP); + P_MODE(IWGRP); + P_MODE(IXGRP); + P_MODE(IRWXO); + P_MODE(IROTH); + P_MODE(IWOTH); + P_MODE(IXOTH); +#undef P_MODE + + if (mode) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode); + + return printed; +} + +#define SCA_MODE_T syscall_arg__scnprintf_mode_t diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c new file mode 100644 index 000000000000..07fa8a0acad6 --- /dev/null +++ b/tools/perf/trace/beauty/msg_flags.c @@ -0,0 +1,62 @@ +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef MSG_PROBE +#define MSG_PROBE 0x10 +#endif +#ifndef MSG_WAITFORONE +#define MSG_WAITFORONE 0x10000 +#endif +#ifndef MSG_SENDPAGE_NOTLAST +#define MSG_SENDPAGE_NOTLAST 0x20000 +#endif +#ifndef MSG_FASTOPEN +#define MSG_FASTOPEN 0x20000000 +#endif +#ifndef MSG_CMSG_CLOEXEC +# define MSG_CMSG_CLOEXEC 0x40000000 +#endif + +static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_MSG_FLAG(n) \ + if (flags & MSG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MSG_##n; \ + } + + P_MSG_FLAG(OOB); + P_MSG_FLAG(PEEK); + P_MSG_FLAG(DONTROUTE); + P_MSG_FLAG(TRYHARD); + P_MSG_FLAG(CTRUNC); + P_MSG_FLAG(PROBE); + P_MSG_FLAG(TRUNC); + P_MSG_FLAG(DONTWAIT); + P_MSG_FLAG(EOR); + P_MSG_FLAG(WAITALL); + P_MSG_FLAG(FIN); + P_MSG_FLAG(SYN); + P_MSG_FLAG(CONFIRM); + P_MSG_FLAG(RST); + P_MSG_FLAG(ERRQUEUE); + P_MSG_FLAG(NOSIGNAL); + P_MSG_FLAG(MORE); + P_MSG_FLAG(WAITFORONE); + P_MSG_FLAG(SENDPAGE_NOTLAST); + P_MSG_FLAG(FASTOPEN); + P_MSG_FLAG(CMSG_CLOEXEC); +#undef P_MSG_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c new file mode 100644 index 000000000000..0f3679e0cdcf --- /dev/null +++ b/tools/perf/trace/beauty/open_flags.c @@ -0,0 +1,56 @@ + +static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (!(flags & O_CREAT)) + arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ + + if (flags == 0) + return scnprintf(bf, size, "RDONLY"); +#define P_FLAG(n) \ + if (flags & O_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~O_##n; \ + } + + P_FLAG(APPEND); + P_FLAG(ASYNC); + P_FLAG(CLOEXEC); + P_FLAG(CREAT); + P_FLAG(DIRECT); + P_FLAG(DIRECTORY); + P_FLAG(EXCL); + P_FLAG(LARGEFILE); + P_FLAG(NOATIME); + P_FLAG(NOCTTY); +#ifdef O_NONBLOCK + P_FLAG(NONBLOCK); +#elif O_NDELAY + P_FLAG(NDELAY); +#endif +#ifdef O_PATH + P_FLAG(PATH); +#endif + P_FLAG(RDWR); +#ifdef O_DSYNC + if ((flags & O_SYNC) == O_SYNC) + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); + else { + P_FLAG(DSYNC); + } +#else + P_FLAG(SYNC); +#endif + P_FLAG(TRUNC); + P_FLAG(WRONLY); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c new file mode 100644 index 000000000000..311f09dd718d --- /dev/null +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -0,0 +1,43 @@ +#ifndef PERF_FLAG_FD_NO_GROUP +# define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#endif + +#ifndef PERF_FLAG_FD_OUTPUT +# define PERF_FLAG_FD_OUTPUT (1UL << 1) +#endif + +#ifndef PERF_FLAG_PID_CGROUP +# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ +#endif + +#ifndef PERF_FLAG_FD_CLOEXEC +# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#endif + +static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return 0; + +#define P_FLAG(n) \ + if (flags & PERF_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~PERF_FLAG_##n; \ + } + + P_FLAG(FD_NO_GROUP); + P_FLAG(FD_OUTPUT); + P_FLAG(PID_CGROUP); + P_FLAG(FD_CLOEXEC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c new file mode 100644 index 000000000000..07486ea65ae3 --- /dev/null +++ b/tools/perf/trace/beauty/pid.c @@ -0,0 +1,21 @@ +static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) +{ + int pid = arg->val; + struct trace *trace = arg->trace; + size_t printed = scnprintf(bf, size, "%d", pid); + struct thread *thread = machine__findnew_thread(trace->host, pid, pid); + + if (thread != NULL) { + if (!thread->comm_set) + thread__set_comm_from_proc(thread); + + if (thread->comm_set) + printed += scnprintf(bf + printed, size - printed, + " (%s)", thread__comm_str(thread)); + thread__put(thread); + } + + return printed; +} + +#define SCA_PID syscall_arg__scnprintf_pid diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c new file mode 100644 index 000000000000..c205bc608b3c --- /dev/null +++ b/tools/perf/trace/beauty/sched_policy.c @@ -0,0 +1,44 @@ +#include <sched.h> + +/* + * Not defined anywhere else, probably, just to make sure we + * catch future flags + */ +#define SCHED_POLICY_MASK 0xff + +#ifndef SCHED_DEADLINE +#define SCHED_DEADLINE 6 +#endif + +static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size, + struct syscall_arg *arg) +{ + const char *policies[] = { + "NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE", + }; + size_t printed; + int policy = arg->val, + flags = policy & ~SCHED_POLICY_MASK; + + policy &= SCHED_POLICY_MASK; + if (policy <= SCHED_DEADLINE) + printed = scnprintf(bf, size, "%s", policies[policy]); + else + printed = scnprintf(bf, size, "%#x", policy); + +#define P_POLICY_FLAG(n) \ + if (flags & SCHED_##n) { \ + printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ + flags &= ~SCHED_##n; \ + } + + P_POLICY_FLAG(RESET_ON_FORK); +#undef P_POLICY_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "|%#x", flags); + + return printed; +} + +#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c new file mode 100644 index 000000000000..213c5a7e3e92 --- /dev/null +++ b/tools/perf/trace/beauty/seccomp.c @@ -0,0 +1,52 @@ +#include <linux/seccomp.h> + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg) +{ + int op = arg->val; + size_t printed = 0; + + switch (op) { +#define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break + P_SECCOMP_SET_MODE_OP(STRICT); + P_SECCOMP_SET_MODE_OP(FILTER); +#undef P_SECCOMP_SET_MODE_OP + default: printed = scnprintf(bf, size, "%#x", op); break; + } + + return printed; +} + +#define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op + +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 +#endif + +static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & SECCOMP_FILTER_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~SECCOMP_FILTER_FLAG_##n; \ + } + + P_FLAG(TSYNC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c new file mode 100644 index 000000000000..d3b0b1fab077 --- /dev/null +++ b/tools/perf/trace/beauty/signum.c @@ -0,0 +1,53 @@ + +static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) +{ + int sig = arg->val; + + switch (sig) { +#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) + P_SIGNUM(HUP); + P_SIGNUM(INT); + P_SIGNUM(QUIT); + P_SIGNUM(ILL); + P_SIGNUM(TRAP); + P_SIGNUM(ABRT); + P_SIGNUM(BUS); + P_SIGNUM(FPE); + P_SIGNUM(KILL); + P_SIGNUM(USR1); + P_SIGNUM(SEGV); + P_SIGNUM(USR2); + P_SIGNUM(PIPE); + P_SIGNUM(ALRM); + P_SIGNUM(TERM); + P_SIGNUM(CHLD); + P_SIGNUM(CONT); + P_SIGNUM(STOP); + P_SIGNUM(TSTP); + P_SIGNUM(TTIN); + P_SIGNUM(TTOU); + P_SIGNUM(URG); + P_SIGNUM(XCPU); + P_SIGNUM(XFSZ); + P_SIGNUM(VTALRM); + P_SIGNUM(PROF); + P_SIGNUM(WINCH); + P_SIGNUM(IO); + P_SIGNUM(PWR); + P_SIGNUM(SYS); +#ifdef SIGEMT + P_SIGNUM(EMT); +#endif +#ifdef SIGSTKFLT + P_SIGNUM(STKFLT); +#endif +#ifdef SIGSWI + P_SIGNUM(SWI); +#endif + default: break; + } + + return scnprintf(bf, size, "%#x", sig); +} + +#define SCA_SIGNUM syscall_arg__scnprintf_signum diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c new file mode 100644 index 000000000000..0a5ce818131c --- /dev/null +++ b/tools/perf/trace/beauty/socket_type.c @@ -0,0 +1,60 @@ +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef SOCK_DCCP +# define SOCK_DCCP 6 +#endif + +#ifndef SOCK_CLOEXEC +# define SOCK_CLOEXEC 02000000 +#endif + +#ifndef SOCK_NONBLOCK +# define SOCK_NONBLOCK 00004000 +#endif + +#ifndef SOCK_TYPE_MASK +#define SOCK_TYPE_MASK 0xf +#endif + +static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg) +{ + size_t printed; + int type = arg->val, + flags = type & ~SOCK_TYPE_MASK; + + type &= SOCK_TYPE_MASK; + /* + * Can't use a strarray, MIPS may override for ABI reasons. + */ + switch (type) { +#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; + P_SK_TYPE(STREAM); + P_SK_TYPE(DGRAM); + P_SK_TYPE(RAW); + P_SK_TYPE(RDM); + P_SK_TYPE(SEQPACKET); + P_SK_TYPE(DCCP); + P_SK_TYPE(PACKET); +#undef P_SK_TYPE + default: + printed = scnprintf(bf, size, "%#x", type); + } + +#define P_SK_FLAG(n) \ + if (flags & SOCK_##n) { \ + printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ + flags &= ~SOCK_##n; \ + } + + P_SK_FLAG(CLOEXEC); + P_SK_FLAG(NONBLOCK); +#undef P_SK_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "|%#x", flags); + + return printed; +} + +#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c new file mode 100644 index 000000000000..7942724adec8 --- /dev/null +++ b/tools/perf/trace/beauty/waitid_options.c @@ -0,0 +1,26 @@ +#include <sys/types.h> +#include <sys/wait.h> + +static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, options = arg->val; + +#define P_OPTION(n) \ + if (options & W##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + options &= ~W##n; \ + } + + P_OPTION(NOHANG); + P_OPTION(UNTRACED); + P_OPTION(CONTINUED); +#undef P_OPTION + + if (options) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options); + + return printed; +} + +#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4b9816555946..538bae880bfe 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -337,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node, chain = list_entry(node->val.next, struct callchain_list, list); chain->has_children = has_sibling; - if (node->val.next != node->val.prev) { + if (!list_empty(&node->val)) { chain = list_entry(node->val.prev, struct callchain_list, list); chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); } @@ -1607,9 +1607,8 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); dummy_hpp.buf[ret] = '\0'; - rtrim(dummy_hpp.buf); - start = ltrim(dummy_hpp.buf); + start = trim(dummy_hpp.buf); ret = strlen(start); if (start != dummy_hpp.buf) @@ -1897,11 +1896,10 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, bool first = true; int ret; - if (symbol_conf.use_callchain) + if (symbol_conf.use_callchain) { folded_sign = hist_entry__folded(he); - - if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); + } hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) @@ -2137,7 +2135,7 @@ static int hists__browser_title(struct hists *hists, printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); if (thread) { - if (sort__has_thread) { + if (hists__has(hists, thread)) { printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), @@ -2322,7 +2320,8 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; if (browser->hists->thread_filter) { @@ -2331,7 +2330,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); @@ -2356,10 +2355,11 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, { int ret; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ret = asprintf(optstr, "Zoom %s %s(%d) thread", browser->hists->thread_filter ? "out of" : "into", thread->comm_set ? thread__comm_str(thread) : "", @@ -2382,7 +2382,7 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (browser->hists->dso_filter) { @@ -2409,7 +2409,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -2431,10 +2431,10 @@ do_browse_map(struct hist_browser *browser __maybe_unused, } static int -add_map_opt(struct hist_browser *browser __maybe_unused, +add_map_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) @@ -2536,7 +2536,7 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { - if (!sort__has_socket || act->socket < 0) + if (!hists__has(browser->hists, socket) || act->socket < 0) return 0; if (browser->hists->socket_filter > -1) { @@ -2558,7 +2558,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (!sort__has_socket || socket_id < 0) + if (!hists__has(browser->hists, socket) || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", @@ -2749,7 +2749,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, */ goto out_free_stack; case 'a': - if (!sort__has_sym) { + if (!hists__has(hists, sym)) { ui_browser__warning(&browser->b, delay_secs * 2, "Annotation is only available for symbolic views, " "include \"sym*\" in --sort to use it."); @@ -2912,7 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym || browser->selection == NULL) + if (!hists__has(hists, sym) || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2956,7 +2956,7 @@ skip_annotation: goto skip_scripting; if (browser->he_selection) { - if (sort__has_thread && thread) { + if (hists__has(hists, thread) && thread) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], @@ -2971,7 +2971,7 @@ skip_annotation: * * See hist_browser__show_entry. */ - if (sort__has_sym && browser->selection->sym) { + if (hists__has(hists, sym) && browser->selection->sym) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index bd9bf7e343b1..932adfaa05af 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -55,7 +55,7 @@ static u64 he_get_acc_##_field(struct hist_entry *he) \ return he->stat_acc->_field; \ } \ \ -static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ @@ -379,7 +379,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (symbol_conf.use_callchain && sort__has_sym) { + if (symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? h->stat_acc->period : h->stat.period; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3baeaa6e71b5..af07ffb129ca 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -635,7 +635,7 @@ unsigned int hists__sort_list_width(struct hists *hists) ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); } - if (verbose && sort__has_sym) /* Addr + origin */ + if (verbose && hists__has(hists, sym)) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; return ret; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 7aff5acf3265..560eb47d56f9 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -569,9 +569,8 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, first_col = false; fmt->header(fmt, hpp, hists_to_evsel(hists)); - rtrim(hpp->buf); - header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + header_width += fprintf(fp, "%s", trim(hpp->buf)); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eea25e2424e9..8c6c8a0ca642 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += abspath.o libperf-y += alias.o libperf-y += annotate.o libperf-y += build-id.o @@ -9,6 +8,7 @@ libperf-y += env.o libperf-y += event.o libperf-y += evlist.o libperf-y += evsel.o +libperf-y += evsel_fprintf.o libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o @@ -27,9 +27,9 @@ libperf-y += strlist.o libperf-y += strfilter.o libperf-y += top.o libperf-y += usage.o -libperf-y += wrapper.o libperf-y += dso.o libperf-y += symbol.o +libperf-y += symbol_fprintf.o libperf-y += color.o libperf-y += header.o libperf-y += callchain.o @@ -39,6 +39,7 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o +libperf-$(CONFIG_AUDIT) += syscalltbl.o libperf-y += ordered-events.o libperf-y += comm.o libperf-y += thread.o @@ -70,9 +71,9 @@ libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o -libperf-$(CONFIG_X86) += tsc.o -libperf-$(CONFIG_AUXTRACE) += tsc.o +libperf-y += tsc.o libperf-y += cloexec.o +libperf-y += call-path.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c deleted file mode 100644 index 0e76affe9c36..000000000000 --- a/tools/perf/util/abspath.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "cache.h" - -static const char *get_pwd_cwd(void) -{ - static char cwd[PATH_MAX + 1]; - char *pwd; - struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) - return NULL; - pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); - if (!stat(pwd, &pwd_stat) && - pwd_stat.st_dev == cwd_stat.st_dev && - pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); - } - } - return cwd; -} - -const char *make_nonrelative_path(const char *path) -{ - static char buf[PATH_MAX + 1]; - - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } else { - const char *cwd = get_pwd_cwd(); - if (!cwd) - die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } - return buf; -} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b795b6994144..4db73d5a0dbc 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1138,7 +1138,7 @@ fallback: if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) { - char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; + char bf[SBUILD_ID_SIZE + 15] = " with build id "; char *build_id_msg = NULL; if (dso->annotate_warned) @@ -1665,5 +1665,5 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) bool ui__has_annotation(void) { - return use_browser == 1 && sort__has_sym; + return use_browser == 1 && perf_hpp_list.sym; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cea323d9ee7e..9241f8c2b7e1 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); int hist_entry__annotate(struct hist_entry *he, size_t privsize); -int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym); +int symbol__annotate_init(struct map *map, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ec164fe70718..c9169011e55e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + synth_opts->initial_skip = 0; } /* @@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->last_branch_sz = val; } break; + case 's': + synth_opts->initial_skip = strtoul(p, &endptr, 10); + if (p == endptr) + goto out_err; + p = endptr; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e5a8e2d4f2af..767989e0e312 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -68,6 +68,7 @@ enum itrace_period_type { * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type + * @initial_skip: skip N events at the beginning. */ struct itrace_synth_opts { bool set; @@ -86,6 +87,7 @@ struct itrace_synth_opts { unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; + unsigned long initial_skip; }; /** @@ -517,7 +519,7 @@ static inline void auxtrace__free(struct perf_session *session) static inline struct auxtrace_record * auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, - int *err __maybe_unused) + int *err) { *err = 0; return NULL; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0967ce601931..493307d1414c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -842,6 +842,58 @@ bpf_map_op__new(struct parse_events_term *term) return op; } +static struct bpf_map_op * +bpf_map_op__clone(struct bpf_map_op *op) +{ + struct bpf_map_op *newop; + + newop = memdup(op, sizeof(*op)); + if (!newop) { + pr_debug("Failed to alloc bpf_map_op\n"); + return NULL; + } + + INIT_LIST_HEAD(&newop->list); + if (op->key_type == BPF_MAP_KEY_RANGES) { + size_t memsz = op->k.array.nr_ranges * + sizeof(op->k.array.ranges[0]); + + newop->k.array.ranges = memdup(op->k.array.ranges, memsz); + if (!newop->k.array.ranges) { + pr_debug("Failed to alloc indices for map\n"); + free(newop); + return NULL; + } + } + + return newop; +} + +static struct bpf_map_priv * +bpf_map_priv__clone(struct bpf_map_priv *priv) +{ + struct bpf_map_priv *newpriv; + struct bpf_map_op *pos, *newop; + + newpriv = zalloc(sizeof(*newpriv)); + if (!newpriv) { + pr_debug("No enough memory to alloc map private\n"); + return NULL; + } + INIT_LIST_HEAD(&newpriv->ops_list); + + list_for_each_entry(pos, &priv->ops_list, list) { + newop = bpf_map_op__clone(pos); + if (!newop) { + bpf_map_priv__purge(newpriv); + return NULL; + } + list_add_tail(&newop->list, &newpriv->ops_list); + } + + return newpriv; +} + static int bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) { @@ -1417,6 +1469,89 @@ int bpf__apply_obj_config(void) return 0; } +#define bpf__for_each_map(pos, obj, objtmp) \ + bpf_object__for_each_safe(obj, objtmp) \ + bpf_map__for_each(pos, obj) + +#define bpf__for_each_stdout_map(pos, obj, objtmp) \ + bpf__for_each_map(pos, obj, objtmp) \ + if (bpf_map__get_name(pos) && \ + (strcmp("__bpf_stdout__", \ + bpf_map__get_name(pos)) == 0)) + +int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +{ + struct bpf_map_priv *tmpl_priv = NULL; + struct bpf_object *obj, *tmp; + struct perf_evsel *evsel = NULL; + struct bpf_map *map; + int err; + bool need_init = false; + + bpf__for_each_stdout_map(map, obj, tmp) { + struct bpf_map_priv *priv; + + err = bpf_map__get_private(map, (void **)&priv); + if (err) + return -BPF_LOADER_ERRNO__INTERNAL; + + /* + * No need to check map type: type should have been + * verified by kernel. + */ + if (!need_init && !priv) + need_init = !priv; + if (!tmpl_priv && priv) + tmpl_priv = priv; + } + + if (!need_init) + return 0; + + if (!tmpl_priv) { + err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/", + NULL); + if (err) { + pr_debug("ERROR: failed to create bpf-output event\n"); + return -err; + } + + evsel = perf_evlist__last(evlist); + } + + bpf__for_each_stdout_map(map, obj, tmp) { + struct bpf_map_priv *priv; + + err = bpf_map__get_private(map, (void **)&priv); + if (err) + return -BPF_LOADER_ERRNO__INTERNAL; + if (priv) + continue; + + if (tmpl_priv) { + priv = bpf_map_priv__clone(tmpl_priv); + if (!priv) + return -ENOMEM; + + err = bpf_map__set_private(map, priv, bpf_map_priv__clear); + if (err) { + bpf_map_priv__clear(map, priv); + return err; + } + } else if (evsel) { + struct bpf_map_op *op; + + op = bpf_map__add_newop(map, NULL); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_EVSEL; + op->v.evsel = evsel; + } + } + + return 0; +} + #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) #define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) @@ -1590,3 +1725,11 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) bpf__strerror_end(buf, size); return 0; } + +int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index be4311944e3d..941e17275aa7 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -79,6 +79,11 @@ int bpf__strerror_config_obj(struct bpf_object *obj, size_t size); int bpf__apply_obj_config(void); int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); + +int bpf__setup_stdout(struct perf_evlist *evlist); +int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, + char *buf, size_t size); + #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -125,6 +130,12 @@ bpf__apply_obj_config(void) } static inline int +bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} + +static inline int __bpf_strerror(char *buf, size_t size) { if (!size) @@ -177,5 +188,13 @@ bpf__strerror_apply_obj_config(int err __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, + int err __maybe_unused, char *buf, + size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index f1479eeef7da..bff425e1232c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -28,7 +28,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -38,7 +37,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) al.map->dso->hit = 1; @@ -262,14 +261,14 @@ static int machine__write_buildid_table(struct machine *machine, int fd) if (dso__is_vdso(pos)) { name = pos->short_name; - name_len = pos->short_name_len + 1; + name_len = pos->short_name_len; } else if (dso__is_kcore(pos)) { machine__mmap_name(machine, nm, sizeof(nm)); name = nm; - name_len = strlen(nm) + 1; + name_len = strlen(nm); } else { name = pos->long_name; - name_len = pos->long_name_len + 1; + name_len = pos->long_name_len; } in_kernel = pos->kernel || @@ -366,39 +365,17 @@ static char *build_id_cache__dirname_from_path(const char *name, int build_id_cache__list_build_ids(const char *pathname, struct strlist **result) { - struct strlist *list; char *dir_name; - DIR *dir; - struct dirent *d; int ret = 0; - list = strlist__new(NULL, NULL); dir_name = build_id_cache__dirname_from_path(pathname, false, false); - if (!list || !dir_name) { - ret = -ENOMEM; - goto out; - } + if (!dir_name) + return -ENOMEM; - /* List up all dirents */ - dir = opendir(dir_name); - if (!dir) { + *result = lsdir(dir_name, lsdir_no_dot_filter); + if (!*result) ret = -errno; - goto out; - } - - while ((d = readdir(dir)) != NULL) { - if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) - continue; - strlist__add(list, d->d_name); - } - closedir(dir); - -out: free(dir_name); - if (ret) - strlist__delete(list); - else - *result = list; return ret; } diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 3ca453f0c51f..0d814bb74661 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -26,51 +26,28 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); -extern int perf_default_config(const char *, const char *, void *); -extern int perf_config(config_fn_t fn, void *); -extern int perf_config_int(const char *, const char *); -extern u64 perf_config_u64(const char *, const char *); -extern int perf_config_bool(const char *, const char *); -extern int config_error_nonbool(const char *); -extern const char *perf_config_dirname(const char *, const char *); -extern const char *perf_etc_perfconfig(void); +int perf_default_config(const char *, const char *, void *); +int perf_config(config_fn_t fn, void *); +int perf_config_int(const char *, const char *); +u64 perf_config_u64(const char *, const char *); +int perf_config_bool(const char *, const char *); +int config_error_nonbool(const char *); +const char *perf_config_dirname(const char *, const char *); +const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); #define alloc_nr(x) (((x)+16)*3/2) -/* - * Realloc the buffer pointed at by variable 'x' so that it can hold - * at least 'nr' entries; the number of entries currently allocated - * is 'alloc', using the standard growing factor alloc_nr() macro. - * - * DO NOT USE any expression with side-effect for 'x' or 'alloc'. - */ -#define ALLOC_GROW(x, nr, alloc) \ - do { \ - if ((nr) > alloc) { \ - if (alloc_nr(alloc) < (nr)) \ - alloc = (nr); \ - else \ - alloc = alloc_nr(alloc); \ - x = xrealloc((x), alloc * sizeof(*(x))); \ - } \ - } while(0) - - static inline int is_absolute_path(const char *path) { return path[0] == '/'; } -const char *make_nonrelative_path(const char *path); char *strip_path_suffix(const char *path, const char *suffix); -extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); - -extern char *perf_pathdup(const char *fmt, ...) - __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c new file mode 100644 index 000000000000..904a17052e38 --- /dev/null +++ b/tools/perf/util/call-path.c @@ -0,0 +1,122 @@ +/* + * call-path.h: Manipulate a tree data structure containing function call paths + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/rbtree.h> +#include <linux/list.h> + +#include "util.h" +#include "call-path.h" + +static void call_path__init(struct call_path *cp, struct call_path *parent, + struct symbol *sym, u64 ip, bool in_kernel) +{ + cp->parent = parent; + cp->sym = sym; + cp->ip = sym ? 0 : ip; + cp->db_id = 0; + cp->in_kernel = in_kernel; + RB_CLEAR_NODE(&cp->rb_node); + cp->children = RB_ROOT; +} + +struct call_path_root *call_path_root__new(void) +{ + struct call_path_root *cpr; + + cpr = zalloc(sizeof(struct call_path_root)); + if (!cpr) + return NULL; + call_path__init(&cpr->call_path, NULL, NULL, 0, false); + INIT_LIST_HEAD(&cpr->blocks); + return cpr; +} + +void call_path_root__free(struct call_path_root *cpr) +{ + struct call_path_block *pos, *n; + + list_for_each_entry_safe(pos, n, &cpr->blocks, node) { + list_del(&pos->node); + free(pos); + } + free(cpr); +} + +static struct call_path *call_path__new(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, + bool in_kernel) +{ + struct call_path_block *cpb; + struct call_path *cp; + size_t n; + + if (cpr->next < cpr->sz) { + cpb = list_last_entry(&cpr->blocks, struct call_path_block, + node); + } else { + cpb = zalloc(sizeof(struct call_path_block)); + if (!cpb) + return NULL; + list_add_tail(&cpb->node, &cpr->blocks); + cpr->sz += CALL_PATH_BLOCK_SIZE; + } + + n = cpr->next++ & CALL_PATH_BLOCK_MASK; + cp = &cpb->cp[n]; + + call_path__init(cp, parent, sym, ip, in_kernel); + + return cp; +} + +struct call_path *call_path__findnew(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, u64 ks) +{ + struct rb_node **p; + struct rb_node *node_parent = NULL; + struct call_path *cp; + bool in_kernel = ip >= ks; + + if (sym) + ip = 0; + + if (!parent) + return call_path__new(cpr, parent, sym, ip, in_kernel); + + p = &parent->children.rb_node; + while (*p != NULL) { + node_parent = *p; + cp = rb_entry(node_parent, struct call_path, rb_node); + + if (cp->sym == sym && cp->ip == ip) + return cp; + + if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + cp = call_path__new(cpr, parent, sym, ip, in_kernel); + if (!cp) + return NULL; + + rb_link_node(&cp->rb_node, node_parent, p); + rb_insert_color(&cp->rb_node, &parent->children); + + return cp; +} diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h new file mode 100644 index 000000000000..477f6d03b659 --- /dev/null +++ b/tools/perf/util/call-path.h @@ -0,0 +1,77 @@ +/* + * call-path.h: Manipulate a tree data structure containing function call paths + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_CALL_PATH_H +#define __PERF_CALL_PATH_H + +#include <sys/types.h> + +#include <linux/types.h> +#include <linux/rbtree.h> + +/** + * struct call_path - node in list of calls leading to a function call. + * @parent: call path to the parent function call + * @sym: symbol of function called + * @ip: only if sym is null, the ip of the function + * @db_id: id used for db-export + * @in_kernel: whether function is a in the kernel + * @rb_node: node in parent's tree of called functions + * @children: tree of call paths of functions called + * + * In combination with the call_return structure, the call_path structure + * defines a context-sensitve call-graph. + */ +struct call_path { + struct call_path *parent; + struct symbol *sym; + u64 ip; + u64 db_id; + bool in_kernel; + struct rb_node rb_node; + struct rb_root children; +}; + +#define CALL_PATH_BLOCK_SHIFT 8 +#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) +#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) + +struct call_path_block { + struct call_path cp[CALL_PATH_BLOCK_SIZE]; + struct list_head node; +}; + +/** + * struct call_path_root - root of all call paths. + * @call_path: root call path + * @blocks: list of blocks to store call paths + * @next: next free space + * @sz: number of spaces + */ +struct call_path_root { + struct call_path call_path; + struct list_head blocks; + size_t next; + size_t sz; +}; + +struct call_path_root *call_path_root__new(void); +void call_path_root__free(struct call_path_root *cpr); + +struct call_path *call_path__findnew(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, u64 ks); + +#endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 24b4bd0d7754..07fd30bc2f81 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -109,6 +109,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) bool record_opt_set = false; bool try_stack_size = false; + callchain_param.enabled = true; symbol_conf.use_callchain = true; if (!arg) @@ -117,6 +118,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) while ((tok = strtok((char *)arg, ",")) != NULL) { if (!strncmp(tok, "none", strlen(tok))) { callchain_param.mode = CHAIN_NONE; + callchain_param.enabled = false; symbol_conf.use_callchain = false; return 0; } @@ -788,7 +790,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, return 0; } -int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent, +int sample__resolve_callchain(struct perf_sample *sample, + struct callchain_cursor *cursor, struct symbol **parent, struct perf_evsel *evsel, struct addr_location *al, int max_stack) { @@ -796,8 +799,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent return 0; if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || - sort__has_parent) { - return thread__resolve_callchain(al->thread, evsel, sample, + perf_hpp_list.parent) { + return thread__resolve_callchain(al->thread, cursor, evsel, sample, parent, al, max_stack); } return 0; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 18dd22269764..65e2a4f7cb4e 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -212,7 +212,14 @@ struct hist_entry; int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset); int record_callchain_opt(const struct option *opt, const char *arg, int unset); -int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent, +struct record_opts; + +int record_opts__parse_callchain(struct record_opts *record, + struct callchain_param *callchain, + const char *arg, bool unset); + +int sample__resolve_callchain(struct perf_sample *sample, + struct callchain_cursor *cursor, struct symbol **parent, struct perf_evsel *evsel, struct addr_location *al, int max_stack); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); @@ -220,7 +227,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record(const char *arg, struct callchain_param *param); int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); int parse_callchain_top_opt(const char *arg); @@ -236,7 +243,7 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, } #ifdef HAVE_SKIP_CALLCHAIN_IDX -extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); +int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); #else static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, struct ip_callchain *chain __maybe_unused) diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index b4b8cb42fe5e..31f8dcdbd7ef 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -13,7 +13,7 @@ struct cgroup_sel { extern int nr_cgroups; /* number of explicit cgroups defined */ -extern void close_cgroup(struct cgroup_sel *cgrp); -extern int parse_cgroups(const struct option *opt, const char *str, int unset); +void close_cgroup(struct cgroup_sel *cgrp); +int parse_cgroups(const struct option *opt, const char *str, int unset); #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 3bee6773ddb0..d0d465953d36 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -5,7 +5,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -extern int sched_getcpu(void) __THROW; +int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4e727635476e..dad7d8272168 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,6 +13,7 @@ #include <subcmd/exec-cmd.h> #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ +#include "config.h" #define MAXNAME (256) @@ -377,6 +378,21 @@ const char *perf_config_dirname(const char *name, const char *value) return value; } +static int perf_buildid_config(const char *var, const char *value) +{ + /* same dir for all commands */ + if (!strcmp(var, "buildid.dir")) { + const char *dir = perf_config_dirname(var, value); + + if (!dir) + return -1; + strncpy(buildid_dir, dir, MAXPATHLEN-1); + buildid_dir[MAXPATHLEN-1] = '\0'; + } + + return 0; +} + static int perf_default_core_config(const char *var __maybe_unused, const char *value __maybe_unused) { @@ -412,6 +428,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "llvm.")) return perf_llvm_config(var, value); + if (!prefixcmp(var, "buildid.")) + return perf_buildid_config(var, value); + /* Add other config variables here. */ return 0; } @@ -506,41 +525,185 @@ out: return ret; } -/* - * Call this to report error for your variable that should not - * get a boolean value (i.e. "[my] var" means "true"). - */ -int config_error_nonbool(const char *var) +static struct perf_config_section *find_section(struct list_head *sections, + const char *section_name) { - return error("Missing value for '%s'", var); + struct perf_config_section *section; + + list_for_each_entry(section, sections, node) + if (!strcmp(section->name, section_name)) + return section; + + return NULL; +} + +static struct perf_config_item *find_config_item(const char *name, + struct perf_config_section *section) +{ + struct perf_config_item *item; + + list_for_each_entry(item, §ion->items, node) + if (!strcmp(item->name, name)) + return item; + + return NULL; } -struct buildid_dir_config { - char *dir; -}; +static struct perf_config_section *add_section(struct list_head *sections, + const char *section_name) +{ + struct perf_config_section *section = zalloc(sizeof(*section)); + + if (!section) + return NULL; + + INIT_LIST_HEAD(§ion->items); + section->name = strdup(section_name); + if (!section->name) { + pr_debug("%s: strdup failed\n", __func__); + free(section); + return NULL; + } + + list_add_tail(§ion->node, sections); + return section; +} -static int buildid_dir_command_config(const char *var, const char *value, - void *data) +static struct perf_config_item *add_config_item(struct perf_config_section *section, + const char *name) { - struct buildid_dir_config *c = data; - const char *v; + struct perf_config_item *item = zalloc(sizeof(*item)); - /* same dir for all commands */ - if (!strcmp(var, "buildid.dir")) { - v = perf_config_dirname(var, value); - if (!v) - return -1; - strncpy(c->dir, v, MAXPATHLEN-1); - c->dir[MAXPATHLEN-1] = '\0'; + if (!item) + return NULL; + + item->name = strdup(name); + if (!item->name) { + pr_debug("%s: strdup failed\n", __func__); + free(item); + return NULL; } + + list_add_tail(&item->node, §ion->items); + return item; +} + +static int set_value(struct perf_config_item *item, const char *value) +{ + char *val = strdup(value); + + if (!val) + return -1; + + zfree(&item->value); + item->value = val; return 0; } -static void check_buildid_dir_config(void) +static int collect_config(const char *var, const char *value, + void *perf_config_set) { - struct buildid_dir_config c; - c.dir = buildid_dir; - perf_config(buildid_dir_command_config, &c); + int ret = -1; + char *ptr, *key; + char *section_name, *name; + struct perf_config_section *section = NULL; + struct perf_config_item *item = NULL; + struct perf_config_set *set = perf_config_set; + struct list_head *sections = &set->sections; + + key = ptr = strdup(var); + if (!key) { + pr_debug("%s: strdup failed\n", __func__); + return -1; + } + + section_name = strsep(&ptr, "."); + name = ptr; + if (name == NULL || value == NULL) + goto out_free; + + section = find_section(sections, section_name); + if (!section) { + section = add_section(sections, section_name); + if (!section) + goto out_free; + } + + item = find_config_item(name, section); + if (!item) { + item = add_config_item(section, name); + if (!item) + goto out_free; + } + + ret = set_value(item, value); + return ret; + +out_free: + free(key); + perf_config_set__delete(set); + return -1; +} + +struct perf_config_set *perf_config_set__new(void) +{ + struct perf_config_set *set = zalloc(sizeof(*set)); + + if (set) { + INIT_LIST_HEAD(&set->sections); + perf_config(collect_config, set); + } + + return set; +} + +static void perf_config_item__delete(struct perf_config_item *item) +{ + zfree(&item->name); + zfree(&item->value); + free(item); +} + +static void perf_config_section__purge(struct perf_config_section *section) +{ + struct perf_config_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, §ion->items, node) { + list_del_init(&item->node); + perf_config_item__delete(item); + } +} + +static void perf_config_section__delete(struct perf_config_section *section) +{ + perf_config_section__purge(section); + zfree(§ion->name); + free(section); +} + +static void perf_config_set__purge(struct perf_config_set *set) +{ + struct perf_config_section *section, *tmp; + + list_for_each_entry_safe(section, tmp, &set->sections, node) { + list_del_init(§ion->node); + perf_config_section__delete(section); + } +} + +void perf_config_set__delete(struct perf_config_set *set) +{ + perf_config_set__purge(set); + free(set); +} + +/* + * Call this to report error for your variable that should not + * get a boolean value (i.e. "[my] var" means "true"). + */ +int config_error_nonbool(const char *var) +{ + return error("Missing value for '%s'", var); } void set_buildid_dir(const char *dir) @@ -548,16 +711,13 @@ void set_buildid_dir(const char *dir) if (dir) scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); - /* try config file */ - if (buildid_dir[0] == '\0') - check_buildid_dir_config(); - /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { - char *v = getenv("HOME"); - if (v) { + char *home = getenv("HOME"); + + if (home) { snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s", - v, DEBUG_CACHE_DIR); + home, DEBUG_CACHE_DIR); } else { strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1); } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h new file mode 100644 index 000000000000..22ec626ac718 --- /dev/null +++ b/tools/perf/util/config.h @@ -0,0 +1,26 @@ +#ifndef __PERF_CONFIG_H +#define __PERF_CONFIG_H + +#include <stdbool.h> +#include <linux/list.h> + +struct perf_config_item { + char *name; + char *value; + struct list_head node; +}; + +struct perf_config_section { + char *name; + struct list_head items; + struct list_head node; +}; + +struct perf_config_set { + struct list_head sections; +}; + +struct perf_config_set *perf_config_set__new(void); +void perf_config_set__delete(struct perf_config_set *set); + +#endif /* __PERF_CONFIG_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 9bcf2bed3a6d..02d801670f30 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -587,3 +587,15 @@ int cpu__setup_cpunode_map(void) closedir(dir1); return 0; } + +bool cpu_map__has(struct cpu_map *cpus, int cpu) +{ + int i; + + for (i = 0; i < cpus->nr; ++i) { + if (cpus->map[i] == cpu) + return true; + } + + return false; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 81a2562aaa2b..1a0a35073ce1 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -66,4 +66,6 @@ int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), void *data); + +bool cpu_map__has(struct cpu_map *cpus, int cpu); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 811af89ce0bb..bbf69d248ec5 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -632,7 +632,7 @@ static bool is_flush_needed(struct ctf_stream *cs) } static int process_sample_event(struct perf_tool *tool, - union perf_event *_event __maybe_unused, + union perf_event *_event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 1921942fc2e0..be83516155ee 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -136,3 +136,44 @@ ssize_t perf_data_file__write(struct perf_data_file *file, { return writen(file->fd, buf, size); } + +int perf_data_file__switch(struct perf_data_file *file, + const char *postfix, + size_t pos, bool at_exit) +{ + char *new_filepath; + int ret; + + if (check_pipe(file)) + return -EINVAL; + if (perf_data_file__is_read(file)) + return -EINVAL; + + if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0) + return -ENOMEM; + + /* + * Only fire a warning, don't return error, continue fill + * original file. + */ + if (rename(file->path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", file->path, new_filepath); + + if (!at_exit) { + close(file->fd); + ret = perf_data_file__open(file); + if (ret < 0) + goto out; + + if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) { + ret = -errno; + pr_debug("Failed to lseek to %zu: %s", + pos, strerror(errno)); + goto out; + } + } + ret = file->fd; +out: + free(new_filepath); + return ret; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 2b15d0c95c7f..ae510ce16cb1 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -46,5 +46,14 @@ int perf_data_file__open(struct perf_data_file *file); void perf_data_file__close(struct perf_data_file *file); ssize_t perf_data_file__write(struct perf_data_file *file, void *buf, size_t size); - +/* + * If at_exit is set, only rename current perf.data to + * perf.data.<postfix>, continue write on original file. + * Set at_exit when flushing the last output. + * + * Return value is fd of new output. + */ +int perf_data_file__switch(struct perf_data_file *file, + const char *postfix, + size_t pos, bool at_exit); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 1c9689e4cc17..8d96c80cc67e 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -23,6 +23,8 @@ #include "event.h" #include "util.h" #include "thread-stack.h" +#include "callchain.h" +#include "call-path.h" #include "db-export.h" struct deferred_export { @@ -258,8 +260,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (!al->sym) { al->sym = symbol__new(al->addr, 0, 0, "unknown"); if (al->sym) - symbols__insert(&dso->symbols[al->map->type], - al->sym); + dso__insert_symbol(dso, al->map->type, al->sym); } if (al->sym) { @@ -276,6 +277,80 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, return 0; } +static struct call_path *call_path_from_sample(struct db_export *dbe, + struct machine *machine, + struct thread *thread, + struct perf_sample *sample, + struct perf_evsel *evsel) +{ + u64 kernel_start = machine__kernel_start(machine); + struct call_path *current = &dbe->cpr->call_path; + enum chain_order saved_order = callchain_param.order; + int err; + + if (!symbol_conf.use_callchain || !sample->callchain) + return NULL; + + /* + * Since the call path tree must be built starting with the root, we + * must use ORDER_CALL for call chain resolution, in order to process + * the callchain starting with the root node and ending with the leaf. + */ + callchain_param.order = ORDER_CALLER; + err = thread__resolve_callchain(thread, &callchain_cursor, evsel, + sample, NULL, NULL, + sysctl_perf_event_max_stack); + if (err) { + callchain_param.order = saved_order; + return NULL; + } + callchain_cursor_commit(&callchain_cursor); + + while (1) { + struct callchain_cursor_node *node; + struct addr_location al; + u64 dso_db_id = 0, sym_db_id = 0, offset = 0; + + memset(&al, 0, sizeof(al)); + + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + /* + * Handle export of symbol and dso for this node by + * constructing an addr_location struct and then passing it to + * db_ids_from_al() to perform the export. + */ + al.sym = node->sym; + al.map = node->map; + al.machine = machine; + al.addr = node->ip; + + if (al.map && !al.sym) + al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION, + al.addr); + + db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset); + + /* add node to the call path tree if it doesn't exist */ + current = call_path__findnew(dbe->cpr, current, + al.sym, node->ip, + kernel_start); + + callchain_cursor_advance(&callchain_cursor); + } + + /* Reset the callchain order to its prior value. */ + callchain_param.order = saved_order; + + if (current == &dbe->cpr->call_path) { + /* Bail because the callchain was empty. */ + return NULL; + } + + return current; +} + int db_export__branch_type(struct db_export *dbe, u32 branch_type, const char *name) { @@ -329,11 +404,21 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) goto out_put; + if (dbe->cpr) { + struct call_path *cp = call_path_from_sample(dbe, al->machine, + thread, sample, + evsel); + if (cp) { + db_export__call_path(dbe, cp); + es.call_path_id = cp->db_id; + } + } + if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && sample_addr_correlates_sym(&evsel->attr)) { struct addr_location addr_al; - perf_event__preprocess_sample_addr(event, sample, thread, &addr_al); + thread__resolve(thread, &addr_al, sample); err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, &es.addr_sym_db_id, &es.addr_offset); if (err) diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 25e22fd76aca..67bc6b8ad2d6 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -27,6 +27,7 @@ struct dso; struct perf_sample; struct addr_location; struct call_return_processor; +struct call_path_root; struct call_path; struct call_return; @@ -43,6 +44,7 @@ struct export_sample { u64 addr_dso_db_id; u64 addr_sym_db_id; u64 addr_offset; /* addr offset from symbol start */ + u64 call_path_id; }; struct db_export { @@ -64,6 +66,7 @@ struct db_export { int (*export_call_return)(struct db_export *dbe, struct call_return *cr); struct call_return_processor *crp; + struct call_path_root *cpr; u64 evsel_last_db_id; u64 machine_last_db_id; u64 thread_last_db_id; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 8e6395439ca0..3357479082ca 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -38,7 +38,7 @@ int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + char build_id_hex[SBUILD_ID_SIZE]; int ret = 0; size_t len; @@ -1301,7 +1301,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp) size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); return fprintf(fp, "%s", sbuild_id); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 45ec4d0a50ed..0953280629cf 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -162,6 +162,7 @@ struct dso { u8 loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; + u64 text_offset; const char *short_name; const char *long_name; u16 long_name_len; @@ -301,7 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, * TODO */ int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso __maybe_unused); +void dso__data_put_fd(struct dso *dso); void dso__data_close(struct dso *dso); off_t dso__data_size(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index a509aa8433a1..a347b19c961a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -915,8 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ - strbuf_addf(buf, "(function_type)"); - return 0; + return strbuf_add(buf, "(function_type)", 15); } else { if (!dwarf_diename(&type)) return -ENOENT; @@ -927,14 +926,10 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) else if (tag == DW_TAG_enumeration_type) tmp = "enum "; /* Write a base name */ - strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); - return 0; + return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); } ret = die_get_typename(&type, buf); - if (ret == 0) - strbuf_addf(buf, "%s", tmp); - - return ret; + return ret ? ret : strbuf_addstr(buf, tmp); } /** @@ -951,14 +946,13 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - strbuf_addf(buf, "(unknown_type)"); + ret = strbuf_add(buf, " (unknown_type)", 14); } - strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); - - return 0; + return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } +#ifdef HAVE_DWARF_GETLOCATIONS /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -998,22 +992,24 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, } while ((offset = dwarf_ranges(&scopes[1], offset, &base, - &start, &end)) > 0) { + &start, &end)) > 0) { start -= entry; end -= entry; if (first) { - strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, - name, start, end); + ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); first = false; } else { - strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, - start, end); + ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); } + if (ret < 0) + goto out; } if (!first) - strbuf_addf(buf, "]>"); + ret = strbuf_add(buf, "]>", 2); out: free(scopes); @@ -1053,30 +1049,39 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) return -EINVAL; - while ((offset = dwarf_getlocations( - &attr, offset, &base, - &start, &end, &op, &nops)) > 0) { + while ((offset = dwarf_getlocations(&attr, offset, &base, + &start, &end, &op, &nops)) > 0) { if (start == 0) { /* Single Location Descriptions */ ret = die_get_var_innermost_scope(sp_die, vr_die, buf); - return ret; + goto out; } /* Location Lists */ start -= entry; end -= entry; if (first) { - strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, - name, start, end); + ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); first = false; } else { - strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, - start, end); + ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); } + if (ret < 0) + goto out; } if (!first) - strbuf_addf(buf, "]>"); - + ret = strbuf_add(buf, "]>", 2); +out: return ret; } +#else +int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} +#endif diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index c42ec366f2a7..dc0ce1adb075 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -25,48 +25,48 @@ #include <elfutils/version.h> /* Find the realpath of the target file */ -extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); +const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); /* Get DW_AT_comp_dir (should be NULL with older gcc) */ -extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); +const char *cu_get_comp_dir(Dwarf_Die *cu_die); /* Get a line number and file name for given address */ -extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, - const char **fname, int *lineno); +int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, + const char **fname, int *lineno); /* Walk on funcitons at given address */ -extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, - int (*callback)(Dwarf_Die *, void *), void *data); +int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, + int (*callback)(Dwarf_Die *, void *), void *data); /* Ensure that this DIE is a subprogram and definition (not declaration) */ -extern bool die_is_func_def(Dwarf_Die *dw_die); +bool die_is_func_def(Dwarf_Die *dw_die); /* Ensure that this DIE is an instance of a subprogram */ -extern bool die_is_func_instance(Dwarf_Die *dw_die); +bool die_is_func_instance(Dwarf_Die *dw_die); /* Compare diename and tname */ -extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); +bool die_compare_name(Dwarf_Die *dw_die, const char *tname); /* Matching diename with glob pattern */ -extern bool die_match_name(Dwarf_Die *dw_die, const char *glob); +bool die_match_name(Dwarf_Die *dw_die, const char *glob); /* Get callsite line number of inline-function instance */ -extern int die_get_call_lineno(Dwarf_Die *in_die); +int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ -extern const char *die_get_call_file(Dwarf_Die *in_die); +const char *die_get_call_file(Dwarf_Die *in_die); /* Get type die */ -extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Get a type die, but skip qualifiers and typedef */ -extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Check whether the DIE is signed or not */ -extern bool die_is_signed_type(Dwarf_Die *tp_die); +bool die_is_signed_type(Dwarf_Die *tp_die); /* Get data_member_location offset */ -extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); +int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); /* Return values for die_find_child() callbacks */ enum { @@ -77,29 +77,29 @@ enum { }; /* Search child DIEs */ -extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, - int (*callback)(Dwarf_Die *, void *), - void *data, Dwarf_Die *die_mem); +Dwarf_Die *die_find_child(Dwarf_Die *rt_die, + int (*callback)(Dwarf_Die *, void *), + void *data, Dwarf_Die *die_mem); /* Search a non-inlined function including given address */ -extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search a non-inlined function with tail call at given address */ Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem); /* Search the top inlined function including given address */ -extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search the deepest inlined function including given address */ -extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Walk on the instances of given DIE */ -extern int die_walk_instances(Dwarf_Die *in_die, - int (*callback)(Dwarf_Die *, void *), void *data); +int die_walk_instances(Dwarf_Die *in_die, + int (*callback)(Dwarf_Die *, void *), void *data); /* Walker on lines (Note: line number will not be sorted) */ typedef int (* line_walk_callback_t) (const char *fname, int lineno, @@ -109,22 +109,20 @@ typedef int (* line_walk_callback_t) (const char *fname, int lineno, * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on * the lines inside the subprogram, otherwise the DIE must be a CU DIE. */ -extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, - void *data); +int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data); /* Find a variable called 'name' at given address */ -extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, - Dwarf_Addr addr, Dwarf_Die *die_mem); +Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, + Dwarf_Addr addr, Dwarf_Die *die_mem); /* Find a member called 'name' */ -extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, + Dwarf_Die *die_mem); /* Get the name of given variable DIE */ -extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ -extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, - struct strbuf *buf); +int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); #endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7bad5c3fa7b7..f6fcc6832949 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -45,6 +45,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_STAT] = "STAT", [PERF_RECORD_STAT_ROUND] = "STAT_ROUND", [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", + [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; const char *perf_event__name(unsigned int id) @@ -56,13 +57,22 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } -static struct perf_sample synth_sample = { +static int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) +{ + struct perf_sample synth_sample = { .pid = -1, .tid = -1, .time = -1, .stream_id = -1, .cpu = -1, .period = 1, + .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, + }; + + return process(tool, event, &synth_sample, machine); }; /* @@ -186,7 +196,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) return -1; - if (process(tool, event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) return -1; return tgid; @@ -218,7 +228,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); - if (process(tool, event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) return -1; return 0; @@ -344,7 +354,7 @@ out: event->mmap2.pid = tgid; event->mmap2.tid = pid; - if (process(tool, event, &synth_sample, machine) != 0) { + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; } @@ -402,7 +412,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - if (process(tool, event, &synth_sample, machine) != 0) { + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; } @@ -424,7 +434,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, { char filename[PATH_MAX]; DIR *tasks; - struct dirent dirent, *next; + struct dirent *dirent; pid_t tgid, ppid; int rc = 0; @@ -453,11 +463,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, return 0; } - while (!readdir_r(tasks, &dirent, &next) && next) { + while ((dirent = readdir(tasks)) != NULL) { char *end; pid_t _pid; - _pid = strtol(dirent.d_name, &end, 10); + _pid = strtol(dirent->d_name, &end, 10); if (*end) continue; @@ -472,7 +482,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, /* * Send the prepared comm event */ - if (process(tool, comm_event, &synth_sample, machine) != 0) + if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) break; rc = 0; @@ -566,7 +576,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, { DIR *proc; char proc_path[PATH_MAX]; - struct dirent dirent, *next; + struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; @@ -591,9 +601,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (proc == NULL) goto out_free_fork; - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; @@ -701,7 +711,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; - err = process(tool, event, &synth_sample, machine); + err = perf_tool__process_synth_event(tool, event, machine, process); free(event); return err; @@ -1295,12 +1305,9 @@ void thread__find_addr_location(struct thread *thread, * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() */ -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample) +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -1315,11 +1322,11 @@ int perf_event__preprocess_sample(const union perf_event *event, * events, but for older perf.data files there was no such thing, so do * it now. */ - if (cpumode == PERF_RECORD_MISC_KERNEL && + if (sample->cpumode == PERF_RECORD_MISC_KERNEL && machine__kernel_map(machine) == NULL) machine__create_kernel_maps(machine); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -1395,16 +1402,12 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) return false; } -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al) +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); if (!al->map) - thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE, sample->addr, al); al->cpu = sample->cpu; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b7ffb7ee9971..8d363d5e65a2 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -192,6 +192,7 @@ struct perf_sample { u64 data_src; u32 flags; u16 insn_len; + u8 cpumode; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; @@ -232,6 +233,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_STAT = 76, PERF_RECORD_STAT_ROUND = 77, PERF_RECORD_EVENT_UPDATE = 78, + PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_MAX }; @@ -468,6 +470,13 @@ struct stat_round_event { u64 time; }; +struct time_conv_event { + struct perf_event_header header; + u64 time_shift; + u64 time_mult; + u64 time_zero; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -496,6 +505,7 @@ union perf_event { struct stat_config_event stat_config; struct stat_event stat; struct stat_round_event stat_round; + struct time_conv_event time_conv; }; void perf_event__print_totals(void); @@ -597,10 +607,8 @@ int perf_event__process(struct perf_tool *tool, struct addr_location; -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); void addr_location__put(struct addr_location *al); @@ -608,10 +616,8 @@ struct thread; bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 86a03836a83f..c4bfe11479a0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -679,53 +679,52 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, return NULL; } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event * +perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, + u64 end, u64 *prev) { - struct perf_mmap *md = &evlist->mmap[idx]; - u64 head; - u64 old = md->prev; unsigned char *data = md->base + page_size; union perf_event *event = NULL; + int diff = end - start; - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!atomic_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - if (evlist->overwrite) { + if (check_messup) { /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. * - * If we somehow ended up ahead of the head, we got messed up. + * If we somehow ended up ahead of the 'end', we got messed up. * - * In either case, truncate and restart at head. + * In either case, truncate and restart at 'end'. */ - int diff = head - old; if (diff > md->mask / 2 || diff < 0) { fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* - * head points to a known good entry, start there. + * 'end' points to a known good entry, start there. */ - old = head; + start = end; + diff = 0; } } - if (old != head) { + if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[old & md->mask]; + event = (union perf_event *)&data[start & md->mask]; size = event->header.size; + if (size < sizeof(event->header) || diff < (int)size) { + event = NULL; + goto broken_event; + } + /* * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; + if ((start & md->mask) + size != ((start + size) & md->mask)) { + unsigned int offset = start; unsigned int len = min(sizeof(*event), size), cpy; void *dst = md->event_copy; @@ -740,14 +739,83 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) event = (union perf_event *) md->event_copy; } - old += size; + start += size; } - md->prev = old; +broken_event: + if (prev) + *prev = start; return event; } +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + struct perf_mmap *md = &evlist->mmap[idx]; + u64 head; + u64 old = md->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); + + return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); +} + +union perf_event * +perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) +{ + struct perf_mmap *md = &evlist->mmap[idx]; + u64 head, end; + u64 start = md->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); + if (!head) + return NULL; + + /* + * 'head' pointer starts from 0. Kernel minus sizeof(record) form + * it each time when kernel writes to it, so in fact 'head' is + * negative. 'end' pointer is made manually by adding the size of + * the ring buffer to 'head' pointer, means the validate data can + * read is the whole ring buffer. If 'end' is positive, the ring + * buffer has not fully filled, so we must adjust 'end' to 0. + * + * However, since both 'head' and 'end' is unsigned, we can't + * simply compare 'end' against 0. Here we compare '-head' and + * the size of the ring buffer, where -head is the number of bytes + * kernel write to the ring buffer. + */ + if (-head < (u64)(md->mask + 1)) + end = 0; + else + end = head + md->mask + 1; + + return perf_mmap__read(md, false, start, end, &md->prev); +} + +void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) +{ + struct perf_mmap *md = &evlist->mmap[idx]; + u64 head; + + if (!atomic_read(&md->refcnt)) + return; + + head = perf_mmap__read_head(md); + md->prev = head; +} + static bool perf_mmap__empty(struct perf_mmap *md) { return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; @@ -986,26 +1054,34 @@ out_unmap: return -1; } -static size_t perf_evlist__mmap_size(unsigned long pages) +unsigned long perf_event_mlock_kb_in_pages(void) { - if (pages == UINT_MAX) { - int max; + unsigned long pages; + int max; - if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { - /* - * Pick a once upon a time good value, i.e. things look - * strange since we can't read a sysctl value, but lets not - * die yet... - */ - max = 512; - } else { - max -= (page_size / 1024); - } + if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { + /* + * Pick a once upon a time good value, i.e. things look + * strange since we can't read a sysctl value, but lets not + * die yet... + */ + max = 512; + } else { + max -= (page_size / 1024); + } - pages = (max * 1024) / page_size; - if (!is_power_of_2(pages)) - pages = rounddown_pow_of_two(pages); - } else if (!is_power_of_2(pages)) + pages = (max * 1024) / page_size; + if (!is_power_of_2(pages)) + pages = rounddown_pow_of_two(pages); + + return pages; +} + +static size_t perf_evlist__mmap_size(unsigned long pages) +{ + if (pages == UINT_MAX) + pages = perf_event_mlock_kb_in_pages(); + else if (!is_power_of_2(pages)) return 0; return (pages + 1) * page_size; @@ -1192,6 +1268,24 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__propagate_maps(evlist); } +void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evsel__set_sample_bit(evsel, bit); +} + +void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evsel__reset_sample_bit(evsel, bit); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a0d15221db6e..85d1b59802e8 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -87,6 +87,17 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist); int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); +void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit); +void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit); + +#define perf_evlist__set_sample_bit(evlist, bit) \ + __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit) + +#define perf_evlist__reset_sample_bit(evlist, bit) \ + __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) + int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); @@ -118,16 +129,23 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); +union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, + int idx); +void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); + void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); +struct callchain_param; + void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); bool perf_can_record_switch_events(void); bool perf_can_record_cpu_wide(void); -void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); +void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, + struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); int perf_evlist__prepare_workload(struct perf_evlist *evlist, @@ -144,6 +162,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset); +unsigned long perf_event_mlock_kb_in_pages(void); + int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, bool overwrite, unsigned int auxtrace_pages, bool auxtrace_overwrite); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0902fe418754..52c7d8884741 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -226,7 +226,8 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { - evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->attr.sample_period = 1; } @@ -561,10 +562,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } -static void -perf_evsel__config_callgraph(struct perf_evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; @@ -704,7 +704,7 @@ static void apply_config_terms(struct perf_evsel *evsel, /* set perf-event callgraph */ if (param.enabled) - perf_evsel__config_callgraph(evsel, opts, ¶m); + perf_evsel__config_callchain(evsel, opts, ¶m); } } @@ -736,7 +736,8 @@ static void apply_config_terms(struct perf_evsel *evsel, * enable/disable events specifically, as there's no * initial traced exec call. */ -void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) +void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain) { struct perf_evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->attr; @@ -811,8 +812,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (perf_evsel__is_function_event(evsel)) evsel->attr.exclude_callchain_user = 1; - if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel, opts, &callchain_param); + if (callchain && callchain->enabled && !evsel->no_aux_samples) + perf_evsel__config_callchain(evsel, opts, callchain); if (opts->sample_intr_regs) { attr->sample_regs_intr = opts->sample_intr_regs; @@ -826,7 +827,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) perf_evsel__set_sample_bit(evsel, PERIOD); /* - * When the user explicitely disabled time don't force it here. + * When the user explicitly disabled time don't force it here. */ if (opts->sample_time && (!perf_missing_features.sample_id_all && @@ -1230,6 +1231,21 @@ static void __p_sample_type(char *buf, size_t size, u64 value) __p_bits(buf, size, value, bits); } +static void __p_branch_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } + struct bit_names bits[] = { + bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), + bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), + bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), + bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), + bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + static void __p_read_format(char *buf, size_t size, u64 value) { #define bit_name(n) { PERF_FORMAT_##n, #n } @@ -1248,6 +1264,7 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) #define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) #define PRINT_ATTRn(_n, _f, _p) \ @@ -1299,12 +1316,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); + PRINT_ATTRf(write_backward, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); - PRINT_ATTRf(branch_sample_type, p_unsigned); + PRINT_ATTRf(branch_sample_type, p_branch_sample_type); PRINT_ATTRf(sample_regs_user, p_hex); PRINT_ATTRf(sample_stack_user, p_unsigned); PRINT_ATTRf(clockid, p_signed); @@ -1643,6 +1661,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->weight = 0; + data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -2252,98 +2271,11 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } -static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) -{ - va_list args; - int ret = 0; - - if (!*first) { - ret += fprintf(fp, ","); - } else { - ret += fprintf(fp, ":"); - *first = false; - } - - va_start(args, fmt); - ret += vfprintf(fp, fmt, args); - va_end(args); - return ret; -} - -static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) -{ - return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); -} - -int perf_evsel__fprintf(struct perf_evsel *evsel, - struct perf_attr_details *details, FILE *fp) -{ - bool first = true; - int printed = 0; - - if (details->event_group) { - struct perf_evsel *pos; - - if (!perf_evsel__is_group_leader(evsel)) - return 0; - - if (evsel->nr_members > 1) - printed += fprintf(fp, "%s{", evsel->group_name ?: ""); - - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - for_each_group_member(pos, evsel) - printed += fprintf(fp, ",%s", perf_evsel__name(pos)); - - if (evsel->nr_members > 1) - printed += fprintf(fp, "}"); - goto out; - } - - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - - if (details->verbose) { - printed += perf_event_attr__fprintf(fp, &evsel->attr, - __print_attr__fprintf, &first); - } else if (details->freq) { - const char *term = "sample_freq"; - - if (!evsel->attr.freq) - term = "sample_period"; - - printed += comma_fprintf(fp, &first, " %s=%" PRIu64, - term, (u64)evsel->attr.sample_freq); - } - - if (details->trace_fields) { - struct format_field *field; - - if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { - printed += comma_fprintf(fp, &first, " (not a tracepoint)"); - goto out; - } - - field = evsel->tp_format->format.fields; - if (field == NULL) { - printed += comma_fprintf(fp, &first, " (no trace field)"); - goto out; - } - - printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name); - - field = field->next; - while (field) { - printed += comma_fprintf(fp, &first, "%s", field->name); - field = field->next; - } - } -out: - fputc('\n', fp); - return ++printed; -} - bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize) { + int paranoid; + if ((err == ENOENT || err == ENXIO || err == ENODEV) && evsel->attr.type == PERF_TYPE_HARDWARE && evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) { @@ -2363,6 +2295,22 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, zfree(&evsel->name); return true; + } else if (err == EACCES && !evsel->attr.exclude_kernel && + (paranoid = perf_event_paranoid()) > 1) { + const char *name = perf_evsel__name(evsel); + char *new_name; + + if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0) + return false; + + if (evsel->name) + free(evsel->name); + evsel->name = new_name; + scnprintf(msg, msgsize, +"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid); + evsel->attr.exclude_kernel = 1; + + return true; } return false; @@ -2381,12 +2329,13 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" "unprivileged users (without CAP_SYS_ADMIN).\n\n" - "The default value is 1:\n\n" + "The current value is %d:\n\n" " -1: Allow use of (almost) all events by all users\n" ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", - target->system_wide ? "system-wide " : ""); + target->system_wide ? "system-wide " : "", + perf_event_paranoid()); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", perf_evsel__name(evsel)); @@ -2396,10 +2345,18 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Probably the maximum number of open file descriptors has been reached.\n" "Hint: Try again after reducing the number of events.\n" "Hint: Try increasing the limit with 'ulimit -n <limit>'"); + case ENOMEM: + if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && + access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) + return scnprintf(msg, size, + "Not enough memory to setup event with callchain.\n" + "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" + "Hint: Current value: %d", sysctl_perf_event_max_stack); + break; case ENODEV: if (target->cpu_list) return scnprintf(msg, size, "%s", - "No such device - did you specify an out-of-range profile CPU?\n"); + "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: if (evsel->attr.precise_ip) @@ -2431,7 +2388,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support configured?\n", + "No CONFIG_PERF_EVENTS=y kernel support configured?", err, strerror_r(err, sbuf, sizeof(sbuf)), perf_evsel__name(evsel)); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 501ea6e565f1..8a644fef452c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -178,8 +178,14 @@ void perf_evsel__init(struct perf_evsel *evsel, void perf_evsel__exit(struct perf_evsel *evsel); void perf_evsel__delete(struct perf_evsel *evsel); +struct callchain_param; + void perf_evsel__config(struct perf_evsel *evsel, - struct record_opts *opts); + struct record_opts *opts, + struct callchain_param *callchain); +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *callchain); int __perf_evsel__sample_size(u64 sample_type); void perf_evsel__calc_id_pos(struct perf_evsel *evsel); @@ -381,6 +387,24 @@ struct perf_attr_details { int perf_evsel__fprintf(struct perf_evsel *evsel, struct perf_attr_details *details, FILE *fp); +#define EVSEL__PRINT_IP (1<<0) +#define EVSEL__PRINT_SYM (1<<1) +#define EVSEL__PRINT_DSO (1<<2) +#define EVSEL__PRINT_SYMOFFSET (1<<3) +#define EVSEL__PRINT_ONELINE (1<<4) +#define EVSEL__PRINT_SRCLINE (1<<5) +#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) + +struct callchain_cursor; + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp); + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp); + bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize); int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, @@ -396,7 +420,7 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) -static inline bool has_branch_callstack(struct perf_evsel *evsel) +static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel) { return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c new file mode 100644 index 000000000000..3674e77ad640 --- /dev/null +++ b/tools/perf/util/evsel_fprintf.c @@ -0,0 +1,212 @@ +#include <stdio.h> +#include <stdbool.h> +#include <traceevent/event-parse.h> +#include "evsel.h" +#include "callchain.h" +#include "map.h" +#include "symbol.h" + +static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (!*first) { + ret += fprintf(fp, ","); + } else { + ret += fprintf(fp, ":"); + *first = false; + } + + va_start(args, fmt); + ret += vfprintf(fp, fmt, args); + va_end(args); + return ret; +} + +static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) +{ + return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); +} + +int perf_evsel__fprintf(struct perf_evsel *evsel, + struct perf_attr_details *details, FILE *fp) +{ + bool first = true; + int printed = 0; + + if (details->event_group) { + struct perf_evsel *pos; + + if (!perf_evsel__is_group_leader(evsel)) + return 0; + + if (evsel->nr_members > 1) + printed += fprintf(fp, "%s{", evsel->group_name ?: ""); + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + for_each_group_member(pos, evsel) + printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + + if (evsel->nr_members > 1) + printed += fprintf(fp, "}"); + goto out; + } + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + + if (details->verbose) { + printed += perf_event_attr__fprintf(fp, &evsel->attr, + __print_attr__fprintf, &first); + } else if (details->freq) { + const char *term = "sample_freq"; + + if (!evsel->attr.freq) + term = "sample_period"; + + printed += comma_fprintf(fp, &first, " %s=%" PRIu64, + term, (u64)evsel->attr.sample_freq); + } + + if (details->trace_fields) { + struct format_field *field; + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { + printed += comma_fprintf(fp, &first, " (not a tracepoint)"); + goto out; + } + + field = evsel->tp_format->format.fields; + if (field == NULL) { + printed += comma_fprintf(fp, &first, " (no trace field)"); + goto out; + } + + printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name); + + field = field->next; + while (field) { + printed += comma_fprintf(fp, &first, "%s", field->name); + field = field->next; + } + } +out: + fputc('\n', fp); + return ++printed; +} + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, struct callchain_cursor *cursor, + FILE *fp) +{ + int printed = 0; + struct callchain_cursor_node *node; + int print_ip = print_opts & EVSEL__PRINT_IP; + int print_sym = print_opts & EVSEL__PRINT_SYM; + int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; + int print_oneline = print_opts & EVSEL__PRINT_ONELINE; + int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; + int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + char s = print_oneline ? ' ' : '\t'; + + if (sample->callchain) { + struct addr_location node_al; + + callchain_cursor_commit(cursor); + + while (1) { + u64 addr = 0; + + node = callchain_cursor_current(cursor); + if (!node) + break; + + if (node->sym && node->sym->ignore) + goto next; + + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + + if (print_ip) + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + + if (node->map) + addr = node->map->map_ip(node->map, node->ip); + + if (print_sym) { + printed += fprintf(fp, " "); + node_al.addr = addr; + node_al.map = node->map; + + if (print_symoffset) { + printed += __symbol__fprintf_symname_offs(node->sym, &node_al, + print_unknown_as_addr, fp); + } else { + printed += __symbol__fprintf_symname(node->sym, &node_al, + print_unknown_as_addr, fp); + } + } + + if (print_dso) { + printed += fprintf(fp, " ("); + printed += map__fprintf_dsoname(node->map, fp); + printed += fprintf(fp, ")"); + } + + if (print_srcline) + printed += map__fprintf_srcline(node->map, addr, "\n ", fp); + + if (!print_oneline) + printed += fprintf(fp, "\n"); +next: + callchain_cursor_advance(cursor); + } + } + + return printed; +} + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp) +{ + int printed = 0; + int print_ip = print_opts & EVSEL__PRINT_IP; + int print_sym = print_opts & EVSEL__PRINT_SYM; + int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; + int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; + int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + + if (cursor != NULL) { + printed += sample__fprintf_callchain(sample, left_alignment, + print_opts, cursor, fp); + } else if (!(al->sym && al->sym->ignore)) { + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + + if (print_ip) + printed += fprintf(fp, "%16" PRIx64, sample->ip); + + if (print_sym) { + printed += fprintf(fp, " "); + if (print_symoffset) { + printed += __symbol__fprintf_symname_offs(al->sym, al, + print_unknown_as_addr, fp); + } else { + printed += __symbol__fprintf_symname(al->sym, al, + print_unknown_as_addr, fp); + } + } + + if (print_dso) { + printed += fprintf(fp, " ("); + printed += map__fprintf_dsoname(al->map, fp); + printed += fprintf(fp, ")"); + } + + if (print_srcline) + printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); + } + + return printed; +} diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 45bf9c6d3257..2fbeb59c4bdd 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -2,45 +2,39 @@ #define __GENELF_H__ /* genelf.c */ -extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, - const void *code, int csize, - void *debug, int nr_debug_entries); +int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize, void *debug, int nr_debug_entries); /* genelf_debug.c */ -extern int jit_add_debug_info(Elf *e, uint64_t code_addr, - void *debug, int nr_debug_entries); +int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM -#define GEN_ELF_ENDIAN ELFDATA2LSB #define GEN_ELF_CLASS ELFCLASS32 #elif defined(__aarch64__) #define GEN_ELF_ARCH EM_AARCH64 -#define GEN_ELF_ENDIAN ELFDATA2LSB #define GEN_ELF_CLASS ELFCLASS64 #elif defined(__x86_64__) #define GEN_ELF_ARCH EM_X86_64 -#define GEN_ELF_ENDIAN ELFDATA2LSB #define GEN_ELF_CLASS ELFCLASS64 #elif defined(__i386__) #define GEN_ELF_ARCH EM_386 -#define GEN_ELF_ENDIAN ELFDATA2LSB #define GEN_ELF_CLASS ELFCLASS32 -#elif defined(__ppcle__) -#define GEN_ELF_ARCH EM_PPC -#define GEN_ELF_ENDIAN ELFDATA2LSB -#define GEN_ELF_CLASS ELFCLASS64 -#elif defined(__powerpc__) -#define GEN_ELF_ARCH EM_PPC64 -#define GEN_ELF_ENDIAN ELFDATA2MSB -#define GEN_ELF_CLASS ELFCLASS64 -#elif defined(__powerpcle__) +#elif defined(__powerpc64__) #define GEN_ELF_ARCH EM_PPC64 -#define GEN_ELF_ENDIAN ELFDATA2LSB #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpc__) +#define GEN_ELF_ARCH EM_PPC +#define GEN_ELF_CLASS ELFCLASS32 #else #error "unsupported architecture" #endif +#if __BYTE_ORDER == __BIG_ENDIAN +#define GEN_ELF_ENDIAN ELFDATA2MSB +#else +#define GEN_ELF_ENDIAN ELFDATA2LSB +#endif + #if GEN_ELF_CLASS == ELFCLASS64 #define elf_newehdr elf64_newehdr #define elf_getshdr elf64_getshdr diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 73e38e472ecd..08852dde1378 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1474,7 +1474,7 @@ static int __event_process_build_id(struct build_id_event *bev, dso = machine__findnew_dso(machine, filename); if (dso != NULL) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; dso__set_build_id(dso, &bev->build_id); @@ -1819,7 +1819,8 @@ static int process_cpu_topology(struct perf_file_section *section, ph->env.nr_sibling_cores = nr; size += sizeof(u32); - strbuf_init(&sb, 128); + if (strbuf_init(&sb, 128) < 0) + goto free_cpu; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); @@ -1827,7 +1828,8 @@ static int process_cpu_topology(struct perf_file_section *section, goto error; /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; size += string_size(str); free(str); } @@ -1849,7 +1851,8 @@ static int process_cpu_topology(struct perf_file_section *section, goto error; /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; size += string_size(str); free(str); } @@ -1872,11 +1875,6 @@ static int process_cpu_topology(struct perf_file_section *section, if (ph->needs_swap) nr = bswap_32(nr); - if (nr > (u32)cpu_nr) { - pr_debug("core_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } ph->env.cpu[i].core_id = nr; ret = readn(fd, &nr, sizeof(nr)); @@ -1917,13 +1915,14 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse /* nr nodes */ ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) - goto error; + return -1; if (ph->needs_swap) nr = bswap_32(nr); ph->env.nr_numa_nodes = nr; - strbuf_init(&sb, 256); + if (strbuf_init(&sb, 256) < 0) + return -1; for (i = 0; i < nr; i++) { /* node number */ @@ -1945,15 +1944,17 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse mem_free = bswap_64(mem_free); } - strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":", - node, mem_total, mem_free); + if (strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":", + node, mem_total, mem_free) < 0) + goto error; str = do_read_string(fd, ph); if (!str) goto error; /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; free(str); } ph->env.numa_nodes = strbuf_detach(&sb, NULL); @@ -1987,7 +1988,8 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused } ph->env.nr_pmu_mappings = pmu_num; - strbuf_init(&sb, 128); + if (strbuf_init(&sb, 128) < 0) + return -1; while (pmu_num) { if (readn(fd, &type, sizeof(type)) != sizeof(type)) @@ -1999,9 +2001,11 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused if (!name) goto error; - strbuf_addf(&sb, "%u:%s", type, name); + if (strbuf_addf(&sb, "%u:%s", type, name) < 0) + goto error; /* include a NULL character at the end */ - strbuf_add(&sb, "", 1); + if (strbuf_add(&sb, "", 1) < 0) + goto error; if (!strcmp(name, "msr")) ph->env.msr_pmu_type = type; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 3d87ca823c0a..d306ca118449 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -121,7 +121,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, +int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 43a98a4dc1e1..d62ccaeeadd6 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -27,16 +27,27 @@ static int levenshtein_compare(const void *p1, const void *p2) return l1 != l2 ? l1 - l2 : strcmp(s1, s2); } -static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) +static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) { - unsigned int i; - - ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); - + unsigned int i, nr = cmds->cnt + old->cnt; + void *tmp; + + if (nr > cmds->alloc) { + /* Choose bigger one to alloc */ + if (alloc_nr(cmds->alloc) < nr) + cmds->alloc = nr; + else + cmds->alloc = alloc_nr(cmds->alloc); + tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names)); + if (!tmp) + return -1; + cmds->names = tmp; + } for (i = 0; i < old->cnt; i++) cmds->names[cmds->cnt++] = old->names[i]; zfree(&old->names); old->cnt = 0; + return 0; } const char *help_unknown_cmd(const char *cmd) @@ -52,8 +63,11 @@ const char *help_unknown_cmd(const char *cmd) load_command_list("perf-", &main_cmds, &other_cmds); - add_cmd_list(&main_cmds, &aliases); - add_cmd_list(&main_cmds, &other_cmds); + if (add_cmd_list(&main_cmds, &aliases) < 0 || + add_cmd_list(&main_cmds, &other_cmds) < 0) { + fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); + goto end; + } qsort(main_cmds.names, main_cmds.cnt, sizeof(main_cmds.names), cmdname_compare); uniq(&main_cmds); @@ -99,6 +113,6 @@ const char *help_unknown_cmd(const char *cmd) for (i = 0; i < n; i++) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } - +end: exit(1); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 290b3cbf6877..cfab531437c7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -295,7 +295,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) root_in = &he->parent_he->hroot_in; root_out = &he->parent_he->hroot_out; } else { - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; @@ -670,7 +670,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al } static int -iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, +iter_add_single_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { /* to avoid calling callback function */ @@ -953,7 +953,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, { int err, err2; - err = sample__resolve_callchain(iter->sample, &iter->parent, + err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); if (err) return err; @@ -1295,8 +1295,9 @@ static int hists__hierarchy_insert_entry(struct hists *hists, return ret; } -int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, - struct hist_entry *he) +static int hists__collapse_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -1372,7 +1373,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) struct hist_entry *n; int ret; - if (!sort__need_collapse) + if (!hists__has(hists, need_collapse)) return 0; hists->nr_entries = 0; @@ -1631,7 +1632,7 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, return; } - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -2035,7 +2036,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -2061,6 +2062,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; + if (symbol_conf.cumulate_callchain) + memset(he->stat_acc, 0, sizeof(he->stat)); rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); hists__inc_stats(hists, he); @@ -2075,7 +2078,7 @@ static struct hist_entry *hists__find_entry(struct hists *hists, { struct rb_node *n; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) n = hists->entries_collapsed.rb_node; else n = hists->entries_in->rb_node; @@ -2104,7 +2107,7 @@ void hists__match(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(leader, need_collapse)) root = &leader->entries_collapsed; else root = leader->entries_in; @@ -2129,7 +2132,7 @@ int hists__link(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(other, need_collapse)) root = &other->entries_collapsed; else root = other->entries_in; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ead18c82294f..0f84bfb42bb1 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -82,6 +82,8 @@ struct hists { int nr_hpp_node; }; +#define hists__has(__h, __f) (__h)->hpp_list->__f + struct hist_entry_iter; struct hist_iter_ops { @@ -199,8 +201,6 @@ int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); -int hists__collapse_insert_entry(struct hists *hists, - struct rb_root *root, struct hist_entry *he); struct perf_hpp { char *buf; @@ -240,6 +240,14 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; + + int need_collapse; + int parent; + int sym; + int dso; + int socket; + int thread; + int comm; }; extern struct perf_hpp_list perf_hpp_list; @@ -433,8 +441,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); struct option; -int parse_filter_percentage(const struct option *opt __maybe_unused, - const char *arg, int unset __maybe_unused); +int parse_filter_percentage(const struct option *opt, const char *arg, int unset); int perf_hist_config(const char *var, const char *value); void perf_hpp_list__init(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index eb0e7f8bf515..9df996085563 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -66,6 +66,7 @@ struct intel_bts { u64 branches_id; size_t branches_event_size; bool synth_needs_swap; + unsigned long num_events; }; struct intel_bts_queue { @@ -275,10 +276,15 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, union perf_event event; struct perf_sample sample = { .ip = 0, }; + if (bts->synth_opts.initial_skip && + bts->num_events++ <= bts->synth_opts.initial_skip) + return 0; + event.sample.header.type = PERF_RECORD_SAMPLE; event.sample.header.misc = PERF_RECORD_MISC_USER; event.sample.header.size = sizeof(struct perf_event_header); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); sample.pid = btsq->pid; sample.tid = btsq->tid; @@ -678,7 +684,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, return 0; } -static int intel_bts_flush(struct perf_session *session __maybe_unused, +static int intel_bts_flush(struct perf_session *session, struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 47314a64399c..9f26eae6c9f0 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -374,7 +374,7 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(char, insn); + insn->displacement.value = get_next(signed char, insn); insn->displacement.nbytes = 1; } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { @@ -532,7 +532,7 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(char, insn); + insn->immediate.value = get_next(signed char, insn); insn->immediate.nbytes = 1; break; case INAT_IMM_WORD: @@ -566,7 +566,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(char, insn); + insn->immediate2.value = get_next(signed char, insn); insn->immediate2.nbytes = 1; } done: diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9409d014b46c..9c8f15da86ce 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -356,7 +356,7 @@ static const char *intel_pt_err_msgs[] = { int intel_pt__strerror(int code, char *buf, size_t buflen) { - if (code < 1 || code > INTEL_PT_ERR_MAX) + if (code < 1 || code >= INTEL_PT_ERR_MAX) code = INTEL_PT_ERR_UNK; strlcpy(buf, intel_pt_err_msgs[code], buflen); return 0; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 05d815851be1..137196990012 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -100,6 +100,8 @@ struct intel_pt { u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; + + unsigned long num_events; }; enum switch_state { @@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -979,6 +985,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; @@ -1028,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1035,6 +1046,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; @@ -1085,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1092,6 +1108,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) if (!pt->timeless_decoding) sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = ptq->state->from_ip; sample.pid = ptq->pid; sample.tid = ptq->tid; @@ -1127,7 +1144,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); - if (pt->synth_opts.callchain) + if (pt->synth_opts.last_branch) intel_pt_reset_last_branch_rb(ptq); return ret; @@ -1196,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION)) { + (state->type & INTEL_PT_INSTRUCTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION)) { + (state->type & INTEL_PT_TRANSACTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index a1e99da0715a..3f42ee4d2a0b 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -3,13 +3,9 @@ #include <data.h> -extern int jit_process(struct perf_session *session, - struct perf_data_file *output, - struct machine *machine, - char *filename, - pid_t pid, - u64 *nbytes); - -extern int jit_inject_record(const char *filename); +int jit_process(struct perf_session *session, struct perf_data_file *output, + struct machine *machine, char *filename, pid_t pid, u64 *nbytes); + +int jit_inject_record(const char *filename); #endif /* __JIT_H__ */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index cd272cc21e05..86afe9618bb0 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -17,6 +17,7 @@ #include "strlist.h" #include <elf.h> +#include "tsc.h" #include "session.h" #include "jit.h" #include "jitdump.h" @@ -33,6 +34,7 @@ struct jit_buf_desc { size_t bufsize; FILE *in; bool needs_bswap; /* handles cross-endianess */ + bool use_arch_timestamp; void *debug_data; size_t nr_debug_entries; uint32_t code_load_count; @@ -158,13 +160,16 @@ jit_open(struct jit_buf_desc *jd, const char *name) header.flags = bswap_64(header.flags); } + jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (verbose > 2) - pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n", header.version, header.total_size, (unsigned long long)header.timestamp, header.pid, - header.elf_mach); + header.elf_mach, + jd->use_arch_timestamp); if (header.flags & JITDUMP_FLAGS_RESERVED) { pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", @@ -172,10 +177,15 @@ jit_open(struct jit_buf_desc *jd, const char *name) goto error; } + if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) { + pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n"); + goto error; + } + /* * validate event is using the correct clockid */ - if (jit_validate_events(jd->session)) { + if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) { pr_err("error, jitted code must be sampled with perf record -k 1\n"); goto error; } @@ -329,6 +339,23 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) return 0; } +static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) +{ + struct perf_tsc_conversion tc; + + if (!jd->use_arch_timestamp) + return timestamp; + + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + + if (!tc.time_mult) + return 0; + + return tsc_to_perf_time(timestamp, &tc); +} + static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; @@ -385,7 +412,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) return -1; } if (stat(filename, &st)) - memset(&st, 0, sizeof(stat)); + memset(&st, 0, sizeof(st)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.misc = PERF_RECORD_MISC_USER; @@ -410,13 +437,14 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment * use first address as sample address */ memset(&sample, 0, sizeof(sample)); + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; sample.time = id->time; @@ -472,7 +500,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) size++; /* for \0 */ if (stat(filename, &st)) - memset(&st, 0, sizeof(stat)); + memset(&st, 0, sizeof(st)); size = PERF_ALIGN(size, sizeof(u64)); @@ -498,13 +526,14 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment * use first address as sample address */ memset(&sample, 0, sizeof(sample)); + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; sample.time = id->time; diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index b66c1f503d9e..bcacd20d0c1c 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -23,9 +23,12 @@ #define JITHEADER_VERSION 1 enum jitdump_flags_bits { + JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT, JITDUMP_FLAGS_MAX_BIT, }; +#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT) + #define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 00724d496d38..33071d6159bc 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -3,11 +3,11 @@ * Copyright (C) 2015, Huawei Inc. */ +#include <limits.h> #include <stdio.h> -#include "util.h" +#include <stdlib.h> #include "debug.h" #include "llvm-utils.h" -#include "cache.h" #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ @@ -98,11 +98,12 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) void *buf = NULL; FILE *file = NULL; size_t read_sz = 0, buf_sz = 0; + char serr[STRERR_BUFSIZE]; file = popen(cmd, "r"); if (!file) { pr_err("ERROR: unable to popen cmd: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); return -EINVAL; } @@ -136,7 +137,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) if (ferror(file)) { pr_err("ERROR: error occurred when reading from pipe: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); err = -EIO; goto errout; } @@ -334,10 +335,18 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, unsigned int kernel_version; char linux_version_code_str[64]; const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], nr_cpus_avail_str[64]; + char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; + char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + if (path[0] != '-' && realpath(path, abspath) == NULL) { + err = errno; + pr_err("ERROR: problems with path %s: %s\n", + path, strerror_r(err, serr, sizeof(serr))); + return -err; + } + if (!template) template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; @@ -362,7 +371,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (nr_cpus_avail <= 0) { pr_err( "WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", strerror(errno)); +" \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr))); nr_cpus_avail = 128; } snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", @@ -387,8 +396,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, * stdin to be source file (testing). */ force_set_env("CLANG_SOURCE", - (path[0] == '-') ? path : - make_nonrelative_path(path)); + (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); err = read_from_pipe(template, &obj_buf, &obj_buf_sz); diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index 5b3cf1c229e2..23b9a743fe72 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -39,11 +39,10 @@ struct llvm_param { }; extern struct llvm_param llvm_param; -extern int perf_llvm_config(const char *var, const char *value); +int perf_llvm_config(const char *var, const char *value); -extern int llvm__compile_bpf(const char *path, void **p_obj_buf, - size_t *p_obj_buf_sz); +int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); /* This function is for test__llvm() use only */ -extern int llvm__search_clang(void); +int llvm__search_clang(void); #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ad79297c76c8..f9644f79686c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->threads = RB_ROOT; pthread_rwlock_init(&machine->threads_lock, NULL); + machine->nr_threads = 0; INIT_LIST_HEAD(&machine->dead_threads); machine->last_match = NULL; @@ -361,7 +362,7 @@ out_err: } /* - * Caller must eventually drop thread->refcnt returned with a successfull + * Caller must eventually drop thread->refcnt returned with a successful * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, @@ -430,6 +431,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, */ thread__get(th); machine->last_match = th; + ++machine->nr_threads; } return th; @@ -681,11 +683,13 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret = 0; + size_t ret; struct rb_node *nd; pthread_rwlock_rdlock(&machine->threads_lock); + ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); @@ -908,11 +912,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, symbol_filter_t filter) +int __machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, bool no_kcore, symbol_filter_t filter) { struct map *map = machine__kernel_map(machine); - int ret = dso__load_kallsyms(map->dso, filename, map, filter); + int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter); if (ret > 0) { dso__set_loaded(map->dso, type); @@ -927,6 +931,12 @@ int machine__load_kallsyms(struct machine *machine, const char *filename, return ret; } +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, symbol_filter_t filter) +{ + return __machine__load_kallsyms(machine, filename, type, false, filter); +} + int machine__load_vmlinux_path(struct machine *machine, enum map_type type, symbol_filter_t filter) { @@ -1301,9 +1311,8 @@ out_problem: int machine__process_mmap2_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1312,8 +1321,8 @@ int machine__process_mmap2_event(struct machine *machine, if (dump_trace) perf_event__fprintf_mmap2(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; @@ -1355,9 +1364,8 @@ out_problem: } int machine__process_mmap_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1366,8 +1374,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_mmap(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; @@ -1415,6 +1423,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); RB_CLEAR_NODE(&th->rb_node); + --machine->nr_threads; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor @@ -1601,6 +1610,7 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, } static int add_callchain_ip(struct thread *thread, + struct callchain_cursor *cursor, struct symbol **parent, struct addr_location *root_al, u8 *cpumode, @@ -1632,7 +1642,7 @@ static int add_callchain_ip(struct thread *thread, * It seems the callchain is corrupted. * Discard all. */ - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); return 1; } return 0; @@ -1642,7 +1652,7 @@ static int add_callchain_ip(struct thread *thread, } if (al.sym != NULL) { - if (sort__has_parent && !*parent && + if (perf_hpp_list.parent && !*parent && symbol__match_regex(al.sym, &parent_regex)) *parent = al.sym; else if (have_ignore_callees && root_al && @@ -1650,13 +1660,13 @@ static int add_callchain_ip(struct thread *thread, /* Treat this symbol as the root, forgetting its callees. */ *root_al = al; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); } } if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; - return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym); + return callchain_cursor_append(cursor, al.addr, al.map, al.sym); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1726,6 +1736,7 @@ static int remove_loops(struct branch_entry *l, int nr) * negative error code on other errors. */ static int resolve_lbr_callchain_sample(struct thread *thread, + struct callchain_cursor *cursor, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, @@ -1758,7 +1769,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, */ int mix_chain_nr = i + 1 + lbr_nr + 1; - if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) { pr_warning("corrupted callchain. skipping...\n"); return 0; } @@ -1780,7 +1791,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = lbr_stack->entries[0].to; } - err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; } @@ -1791,6 +1802,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, } static int thread__resolve_callchain_sample(struct thread *thread, + struct callchain_cursor *cursor, struct perf_evsel *evsel, struct perf_sample *sample, struct symbol **parent, @@ -1805,10 +1817,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, int skip_idx = -1; int first_call = 0; - callchain_cursor_reset(&callchain_cursor); - - if (has_branch_callstack(evsel)) { - err = resolve_lbr_callchain_sample(thread, sample, parent, + if (perf_evsel__has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, root_al, max_stack); if (err) return (err < 0) ? err : 0; @@ -1818,7 +1828,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain->nr < PERF_MAX_STACK_DEPTH) + if (chain->nr < sysctl_perf_event_max_stack) skip_idx = arch_skip_callchain_idx(thread, chain); /* @@ -1865,10 +1875,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, nr = remove_loops(be, nr); for (i = 0; i < nr; i++) { - err = add_callchain_ip(thread, parent, root_al, + err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].to); if (!err) - err = add_callchain_ip(thread, parent, root_al, + err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from); if (err == -EINVAL) break; @@ -1879,7 +1889,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) { + if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) { pr_warning("corrupted callchain. skipping...\n"); return 0; } @@ -1898,7 +1908,7 @@ check_calls: #endif ip = chain->ips[j]; - err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; @@ -1917,19 +1927,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) entry->map, entry->sym); } -int thread__resolve_callchain(struct thread *thread, - struct perf_evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +static int thread__resolve_callchain_unwind(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_evsel *evsel, + struct perf_sample *sample, + int max_stack) { - int ret = thread__resolve_callchain_sample(thread, evsel, - sample, parent, - root_al, max_stack); - if (ret) - return ret; - /* Can we do dwarf post unwind? */ if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) @@ -1940,9 +1943,45 @@ int thread__resolve_callchain(struct thread *thread, (!sample->user_stack.size)) return 0; - return unwind__get_entries(unwind_entry, &callchain_cursor, + return unwind__get_entries(unwind_entry, cursor, thread, sample, max_stack); +} +int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + int ret = 0; + + callchain_cursor_reset(&callchain_cursor); + + if (callchain_param.order == ORDER_CALLEE) { + ret = thread__resolve_callchain_sample(thread, cursor, + evsel, sample, + parent, root_al, + max_stack); + if (ret) + return ret; + ret = thread__resolve_callchain_unwind(thread, cursor, + evsel, sample, + max_stack); + } else { + ret = thread__resolve_callchain_unwind(thread, cursor, + evsel, sample, + max_stack); + if (ret) + return ret; + ret = thread__resolve_callchain_sample(thread, cursor, + evsel, sample, + parent, root_al, + max_stack); + } + + return ret; } int machine__for_each_thread(struct machine *machine, diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 1a3e45baf97f..83f46790c52f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -31,6 +31,7 @@ struct machine { char *root_dir; struct rb_root threads; pthread_rwlock_t threads_lock; + unsigned int nr_threads; struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; @@ -94,7 +95,7 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); -int machine__process_switch_event(struct machine *machine __maybe_unused, +int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); @@ -141,7 +142,11 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct addr_location *al); + +struct callchain_cursor; + int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, struct perf_evsel *evsel, struct perf_sample *sample, struct symbol **parent, @@ -211,6 +216,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); +int __machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, bool no_kcore, symbol_filter_t filter); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter); int machine__load_vmlinux_path(struct machine *machine, enum map_type type, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 171b6d10a04b..b19bcd3b7128 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -289,7 +289,7 @@ int map__load(struct map *map, symbol_filter_t filter) nr = dso__load(map->dso, map, filter); if (nr < 0) { if (map->dso->has_build_id) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(map->dso->build_id, sizeof(map->dso->build_id), @@ -431,6 +431,13 @@ u64 map__rip_2objdump(struct map *map, u64 rip) if (map->dso->rel) return rip - map->pgoff; + /* + * kernel modules also have DSO_TYPE_USER in dso->kernel, + * but all kernel modules are ET_REL, so won't get here. + */ + if (map->dso->kernel == DSO_TYPE_USER) + return rip + map->dso->text_offset; + return map->unmap_ip(map, rip) - map->reloc; } @@ -454,6 +461,13 @@ u64 map__objdump_2mem(struct map *map, u64 ip) if (map->dso->rel) return map->unmap_ip(map, ip + map->pgoff); + /* + * kernel modules also have DSO_TYPE_USER in dso->kernel, + * but all kernel modules are ET_REL, so won't get here. + */ + if (map->dso->kernel == DSO_TYPE_USER) + return map->unmap_ip(map, ip - map->dso->text_offset); + return ip + map->reloc; } diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index b1b9e2385f4b..fe84df1875aa 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -308,3 +308,12 @@ void ordered_events__free(struct ordered_events *oe) free(event); } } + +void ordered_events__reinit(struct ordered_events *oe) +{ + ordered_events__deliver_t old_deliver = oe->deliver; + + ordered_events__free(oe); + memset(oe, '\0', sizeof(*oe)); + ordered_events__init(oe, old_deliver); +} diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index f403991e3bfd..e11468a9a6e4 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -49,6 +49,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); +void ordered_events__reinit(struct ordered_events *oe); static inline void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4c19d5e79d8c..c6fd0479f4cd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -138,11 +138,11 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ - while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ - if (sys_dirent.d_type == DT_DIR && \ - (strcmp(sys_dirent.d_name, ".")) && \ - (strcmp(sys_dirent.d_name, ".."))) +#define for_each_subsystem(sys_dir, sys_dirent) \ + while ((sys_dirent = readdir(sys_dir)) != NULL) \ + if (sys_dirent->d_type == DT_DIR && \ + (strcmp(sys_dirent->d_name, ".")) && \ + (strcmp(sys_dirent->d_name, ".."))) static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) { @@ -159,12 +159,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \ - while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ - if (evt_dirent.d_type == DT_DIR && \ - (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, "..")) && \ - (!tp_event_has_id(&sys_dirent, &evt_dirent))) +#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ + while ((evt_dirent = readdir(evt_dir)) != NULL) \ + if (evt_dirent->d_type == DT_DIR && \ + (strcmp(evt_dirent->d_name, ".")) && \ + (strcmp(evt_dirent->d_name, "..")) && \ + (!tp_event_has_id(sys_dirent, evt_dirent))) #define MAX_EVENT_LENGTH 512 @@ -173,7 +173,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char id_buf[24]; int fd; u64 id; @@ -184,18 +184,18 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) if (!sys_dir) return NULL; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent.d_name); + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; @@ -220,9 +220,9 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) free(path); return NULL; } - strncpy(path->system, sys_dirent.d_name, + strncpy(path->system, sys_dirent->d_name, MAX_EVENT_LENGTH); - strncpy(path->name, evt_dirent.d_name, + strncpy(path->name, evt_dirent->d_name, MAX_EVENT_LENGTH); return path; } @@ -1649,7 +1649,7 @@ static void parse_events_print_error(struct parse_events_error *err, buf = _buf; - /* We're cutting from the beggining. */ + /* We're cutting from the beginning. */ if (err->idx > max_err_idx) cut = err->idx - max_err_idx; @@ -1812,7 +1812,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; char **evt_list = NULL; @@ -1830,20 +1830,20 @@ restart: goto out_close_sys_dir; } - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && - !strglobmatch(sys_dirent.d_name, subsys_glob)) + !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { if (event_glob != NULL && - !strglobmatch(evt_dirent.d_name, event_glob)) + !strglobmatch(evt_dirent->d_name, event_glob)) continue; if (!evt_num_known) { @@ -1852,7 +1852,7 @@ restart: } snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); evt_list[evt_i] = strdup(evt_path); if (evt_list[evt_i] == NULL) @@ -1905,7 +1905,7 @@ out_close_sys_dir: int is_valid_tracepoint(const char *event_string) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; @@ -1913,17 +1913,17 @@ int is_valid_tracepoint(const char *event_string) if (!sys_dir) return 0; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); if (!strcmp(evt_path, event_string)) { closedir(evt_dir); closedir(sys_dir); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 67e493088e81..d740c3ca9a1d 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -22,19 +22,18 @@ struct tracepoint_path { struct tracepoint_path *next; }; -extern struct tracepoint_path *tracepoint_id_to_path(u64 config); -extern struct tracepoint_path *tracepoint_name_to_path(const char *name); -extern bool have_tracepoints(struct list_head *evlist); +struct tracepoint_path *tracepoint_id_to_path(u64 config); +struct tracepoint_path *tracepoint_name_to_path(const char *name); +bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -extern int parse_events_option(const struct option *opt, const char *str, - int unset); -extern int parse_events(struct perf_evlist *evlist, const char *str, - struct parse_events_error *error); -extern int parse_events_terms(struct list_head *terms, const char *str); -extern int parse_filter(const struct option *opt, const char *str, int unset); -extern int exclude_perf(const struct option *opt, const char *arg, int unset); +int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events(struct perf_evlist *evlist, const char *str, + struct parse_events_error *error); +int parse_events_terms(struct list_head *terms, const char *str); +int parse_filter(const struct option *opt, const char *str, int unset); +int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -183,7 +182,7 @@ void print_symbol_events(const char *event_glob, unsigned type, void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); -extern int is_valid_tracepoint(const char *event_string); +int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 3654d964e49d..3bf6bf82ff2d 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -41,36 +41,6 @@ static char *cleanup_path(char *path) return path; } -static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) -{ - const char *perf_dir = get_perf_dir(); - size_t len; - - len = strlen(perf_dir); - if (n < len + 1) - goto bad; - memcpy(buf, perf_dir, len); - if (len && !is_dir_sep(perf_dir[len-1])) - buf[len++] = '/'; - len += vsnprintf(buf + len, n - len, fmt, args); - if (len >= n) - goto bad; - return cleanup_path(buf); -bad: - strlcpy(buf, bad_path, n); - return buf; -} - -char *perf_pathdup(const char *fmt, ...) -{ - char path[PATH_MAX]; - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(path, sizeof(path), fmt, args); - va_end(args); - return xstrdup(path); -} - char *mkpath(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 6b8eb13e14e4..c4023f22f287 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -12,18 +12,18 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; - if (regs->cache_mask & (1 << id)) + if (regs->cache_mask & (1ULL << id)) goto out; - if (!(mask & (1 << id))) + if (!(mask & (1ULL << id))) return -EINVAL; for (i = 0; i < id; i++) { - if (mask & (1 << i)) + if (mask & (1ULL << i)) idx++; } - regs->cache_mask |= (1 << id); + regs->cache_mask |= (1ULL << id); regs->cache_regs[id] = regs->regs[idx]; out: diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index adef23b1352e..ddb0261b2577 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -602,14 +602,13 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - int w; + __u64 w = 0; + int fbit; - w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); - if (!w) - return 0; - if (w < 64) - return (1ULL << w) - 1; - return -1; + for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) + w |= (1ULL << fbit); + + return w; } /* @@ -644,20 +643,20 @@ static int pmu_resolve_param_term(struct parse_events_term *term, static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *str; - struct strbuf buf; + char *str = NULL; + struct strbuf buf = STRBUF_INIT; unsigned i = 0; if (!formats) return NULL; - strbuf_init(&buf, 0); /* sysfs exported terms */ list_for_each_entry(format, formats, list) - strbuf_addf(&buf, i++ ? ",%s" : "%s", - format->name); + if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0) + goto error; str = strbuf_detach(&buf, NULL); +error: strbuf_release(&buf); return str; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 93996ec4bbe3..74401a20106d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -265,6 +265,65 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) return true; } +/* + * NOTE: + * '.gnu.linkonce.this_module' section of kernel module elf directly + * maps to 'struct module' from linux/module.h. This section contains + * actual module name which will be used by kernel after loading it. + * But, we cannot use 'struct module' here since linux/module.h is not + * exposed to user-space. Offset of 'name' has remained same from long + * time, so hardcoding it here. + */ +#ifdef __LP64__ +#define MOD_NAME_OFFSET 24 +#else +#define MOD_NAME_OFFSET 12 +#endif + +/* + * @module can be module name of module file path. In case of path, + * inspect elf and find out what is actual module name. + * Caller has to free mod_name after using it. + */ +static char *find_module_name(const char *module) +{ + int fd; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + char *mod_name = NULL; + + fd = open(module, O_RDONLY); + if (fd < 0) + return NULL; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + goto elf_err; + + if (gelf_getehdr(elf, &ehdr) == NULL) + goto ret_err; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.linkonce.this_module", NULL); + if (!sec) + goto ret_err; + + data = elf_getdata(sec, NULL); + if (!data || !data->d_buf) + goto ret_err; + + mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); + +ret_err: + elf_end(elf); +elf_err: + close(fd); + return mod_name; +} + #ifdef HAVE_DWARF_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) @@ -486,8 +545,10 @@ static int get_text_start_address(const char *exec, unsigned long *address) return -errno; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - return -EINVAL; + if (elf == NULL) { + ret = -EINVAL; + goto out_close; + } if (gelf_getehdr(elf, &ehdr) == NULL) goto out; @@ -499,6 +560,9 @@ static int get_text_start_address(const char *exec, unsigned long *address) ret = 0; out: elf_end(elf); +out_close: + close(fd); + return ret; } @@ -583,32 +647,23 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *module) { int i, ret = 0; - char *tmp; + char *mod_name = NULL; if (!module) return 0; - tmp = strrchr(module, '/'); - if (tmp) { - /* This is a module path -- get the module name */ - module = strdup(tmp + 1); - if (!module) - return -ENOMEM; - tmp = strchr(module, '.'); - if (tmp) - *tmp = '\0'; - tmp = (char *)module; /* For free() */ - } + mod_name = find_module_name(module); for (i = 0; i < ntevs; i++) { - tevs[i].point.module = strdup(module); + tevs[i].point.module = + strdup(mod_name ? mod_name : module); if (!tevs[i].point.module) { ret = -ENOMEM; break; } } - free(tmp); + free(mod_name); return ret; } @@ -1618,69 +1673,65 @@ out: } /* Compose only probe arg */ -int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len) +char *synthesize_perf_probe_arg(struct perf_probe_arg *pa) { struct perf_probe_arg_field *field = pa->field; - int ret; - char *tmp = buf; + struct strbuf buf; + char *ret = NULL; + int err; + + if (strbuf_init(&buf, 64) < 0) + return NULL; if (pa->name && pa->var) - ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var); + err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var); else - ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; + err = strbuf_addstr(&buf, pa->name ?: pa->var); + if (err) + goto out; while (field) { if (field->name[0] == '[') - ret = e_snprintf(tmp, len, "%s", field->name); + err = strbuf_addstr(&buf, field->name); else - ret = e_snprintf(tmp, len, "%s%s", - field->ref ? "->" : ".", field->name); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; + err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".", + field->name); field = field->next; + if (err) + goto out; } - if (pa->type) { - ret = e_snprintf(tmp, len, ":%s", pa->type); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; - } + if (pa->type) + if (strbuf_addf(&buf, ":%s", pa->type) < 0) + goto out; - return tmp - buf; -error: - pr_debug("Failed to synthesize perf probe argument: %d\n", ret); + ret = strbuf_detach(&buf, NULL); +out: + strbuf_release(&buf); return ret; } /* Compose only probe point (not argument) */ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) { - char *buf, *tmp; - char offs[32] = "", line[32] = "", file[32] = ""; - int ret, len; + struct strbuf buf; + char *tmp, *ret = NULL; + int len, err = 0; - buf = zalloc(MAX_CMDLEN); - if (buf == NULL) { - ret = -ENOMEM; - goto error; - } - if (pp->offset) { - ret = e_snprintf(offs, 32, "+%lu", pp->offset); - if (ret <= 0) - goto error; - } - if (pp->line) { - ret = e_snprintf(line, 32, ":%d", pp->line); - if (ret <= 0) - goto error; + if (strbuf_init(&buf, 64) < 0) + return NULL; + + if (pp->function) { + if (strbuf_addstr(&buf, pp->function) < 0) + goto out; + if (pp->offset) + err = strbuf_addf(&buf, "+%lu", pp->offset); + else if (pp->line) + err = strbuf_addf(&buf, ":%d", pp->line); + else if (pp->retprobe) + err = strbuf_addstr(&buf, "%return"); + if (err) + goto out; } if (pp->file) { tmp = pp->file; @@ -1689,25 +1740,15 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) tmp = strchr(pp->file + len - 30, '/'); tmp = tmp ? tmp + 1 : pp->file + len - 30; } - ret = e_snprintf(file, 32, "@%s", tmp); - if (ret <= 0) - goto error; + err = strbuf_addf(&buf, "@%s", tmp); + if (!err && !pp->function && pp->line) + err = strbuf_addf(&buf, ":%d", pp->line); } - - if (pp->function) - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function, - offs, pp->retprobe ? "%return" : "", line, - file); - else - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line); - if (ret <= 0) - goto error; - - return buf; -error: - pr_debug("Failed to synthesize perf probe point: %d\n", ret); - free(buf); - return NULL; + if (!err) + ret = strbuf_detach(&buf, NULL); +out: + strbuf_release(&buf); + return ret; } #if 0 @@ -1736,45 +1777,32 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev) #endif static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref, - char **buf, size_t *buflen, - int depth) + struct strbuf *buf, int depth) { - int ret; + int err; if (ref->next) { depth = __synthesize_probe_trace_arg_ref(ref->next, buf, - buflen, depth + 1); + depth + 1); if (depth < 0) - goto out; - } - - ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset); - if (ret < 0) - depth = ret; - else { - *buf += ret; - *buflen -= ret; + return depth; } -out: - return depth; - + err = strbuf_addf(buf, "%+ld(", ref->offset); + return (err < 0) ? err : depth; } static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, - char *buf, size_t buflen) + struct strbuf *buf) { struct probe_trace_arg_ref *ref = arg->ref; - int ret, depth = 0; - char *tmp = buf; + int depth = 0, err; /* Argument name or separator */ if (arg->name) - ret = e_snprintf(buf, buflen, " %s=", arg->name); + err = strbuf_addf(buf, " %s=", arg->name); else - ret = e_snprintf(buf, buflen, " "); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; + err = strbuf_addch(buf, ' '); + if (err) + return err; /* Special case: @XXX */ if (arg->value[0] == '@' && arg->ref) @@ -1782,59 +1810,44 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, /* Dereferencing arguments */ if (ref) { - depth = __synthesize_probe_trace_arg_ref(ref, &buf, - &buflen, 1); + depth = __synthesize_probe_trace_arg_ref(ref, buf, 1); if (depth < 0) return depth; } /* Print argument value */ if (arg->value[0] == '@' && arg->ref) - ret = e_snprintf(buf, buflen, "%s%+ld", arg->value, - arg->ref->offset); + err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset); else - ret = e_snprintf(buf, buflen, "%s", arg->value); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; + err = strbuf_addstr(buf, arg->value); /* Closing */ - while (depth--) { - ret = e_snprintf(buf, buflen, ")"); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; - } + while (!err && depth--) + err = strbuf_addch(buf, ')'); + /* Print argument type */ - if (arg->type) { - ret = e_snprintf(buf, buflen, ":%s", arg->type); - if (ret <= 0) - return ret; - buf += ret; - } + if (!err && arg->type) + err = strbuf_addf(buf, ":%s", arg->type); - return buf - tmp; + return err; } char *synthesize_probe_trace_command(struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; - char *buf; - int i, len, ret; + struct strbuf buf; + char *ret = NULL; + int i, err; - buf = zalloc(MAX_CMDLEN); - if (buf == NULL) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) return NULL; - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p', - tev->group, tev->event); - if (len <= 0) - goto error; + if (strbuf_init(&buf, 32) < 0) + return NULL; - /* Uprobes must have tp->module */ - if (tev->uprobes && !tp->module) + if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p', + tev->group, tev->event) < 0) goto error; /* * If tp->address == 0, then this point must be a @@ -1849,34 +1862,25 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) /* Use the tp->address for uprobes */ if (tev->uprobes) - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", - tp->module, tp->address); + err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address); else if (!strncmp(tp->symbol, "0x", 2)) /* Absolute address. See try_to_find_absolute_address() */ - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", - tp->module ?: "", tp->module ? ":" : "", - tp->address); + err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "", + tp->module ? ":" : "", tp->address); else - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", - tp->module ?: "", tp->module ? ":" : "", - tp->symbol, tp->offset); - - if (ret <= 0) + err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "", + tp->module ? ":" : "", tp->symbol, tp->offset); + if (err) goto error; - len += ret; - for (i = 0; i < tev->nargs; i++) { - ret = synthesize_probe_trace_arg(&tev->args[i], buf + len, - MAX_CMDLEN - len); - if (ret <= 0) + for (i = 0; i < tev->nargs; i++) + if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0) goto error; - len += ret; - } - return buf; + ret = strbuf_detach(&buf, NULL); error: - free(buf); - return NULL; + strbuf_release(&buf); + return ret; } static int find_perf_probe_point_from_map(struct probe_trace_point *tp, @@ -1958,7 +1962,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, static int convert_to_perf_probe_event(struct probe_trace_event *tev, struct perf_probe_event *pev, bool is_kprobe) { - char buf[64] = ""; + struct strbuf buf = STRBUF_INIT; int i, ret; /* Convert event/group name */ @@ -1981,14 +1985,15 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, if (tev->args[i].name) pev->args[i].name = strdup(tev->args[i].name); else { - ret = synthesize_probe_trace_arg(&tev->args[i], - buf, 64); - pev->args[i].name = strdup(buf); + if ((ret = strbuf_init(&buf, 32)) < 0) + goto error; + ret = synthesize_probe_trace_arg(&tev->args[i], &buf); + pev->args[i].name = strbuf_detach(&buf, NULL); } if (pev->args[i].name == NULL && ret >= 0) ret = -ENOMEM; } - +error: if (ret < 0) clear_perf_probe_event(pev); @@ -2162,35 +2167,38 @@ static int perf_probe_event__sprintf(const char *group, const char *event, struct strbuf *result) { int i, ret; - char buf[128]; - char *place; + char *buf; - /* Synthesize only event probe point */ - place = synthesize_perf_probe_point(&pev->point); - if (!place) - return -EINVAL; + if (asprintf(&buf, "%s:%s", group, event) < 0) + return -errno; + ret = strbuf_addf(result, " %-20s (on ", buf); + free(buf); + if (ret) + return ret; - ret = e_snprintf(buf, 128, "%s:%s", group, event); - if (ret < 0) - goto out; + /* Synthesize only event probe point */ + buf = synthesize_perf_probe_point(&pev->point); + if (!buf) + return -ENOMEM; + ret = strbuf_addstr(result, buf); + free(buf); - strbuf_addf(result, " %-20s (on %s", buf, place); - if (module) - strbuf_addf(result, " in %s", module); + if (!ret && module) + ret = strbuf_addf(result, " in %s", module); - if (pev->nargs > 0) { - strbuf_addstr(result, " with"); - for (i = 0; i < pev->nargs; i++) { - ret = synthesize_perf_probe_arg(&pev->args[i], - buf, 128); - if (ret < 0) - goto out; - strbuf_addf(result, " %s", buf); + if (!ret && pev->nargs > 0) { + ret = strbuf_add(result, " with", 5); + for (i = 0; !ret && i < pev->nargs; i++) { + buf = synthesize_perf_probe_arg(&pev->args[i]); + if (!buf) + return -ENOMEM; + ret = strbuf_addf(result, " %s", buf); + free(buf); } } - strbuf_addch(result, ')'); -out: - free(place); + if (!ret) + ret = strbuf_addch(result, ')'); + return ret; } @@ -2498,7 +2506,8 @@ static int find_probe_functions(struct map *map, char *name, void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused, struct probe_trace_event *tev __maybe_unused, - struct map *map __maybe_unused) { } + struct map *map __maybe_unused, + struct symbol *sym __maybe_unused) { } /* * Find probe function addresses from map. @@ -2516,6 +2525,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, struct probe_trace_point *tp; int num_matched_functions; int ret, i, j, skipped = 0; + char *mod_name; map = get_target_map(pev->target, pev->uprobes); if (!map) { @@ -2600,9 +2610,19 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, tp->realname = strdup_or_goto(sym->name, nomem_out); tp->retprobe = pp->retprobe; - if (pev->target) - tev->point.module = strdup_or_goto(pev->target, - nomem_out); + if (pev->target) { + if (pev->uprobes) { + tev->point.module = strdup_or_goto(pev->target, + nomem_out); + } else { + mod_name = find_module_name(pev->target); + tev->point.module = + strdup(mod_name ? mod_name : pev->target); + free(mod_name); + if (!tev->point.module) + goto nomem_out; + } + } tev->uprobes = pev->uprobes; tev->nargs = pev->nargs; if (tev->nargs) { @@ -2624,7 +2644,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, strdup_or_goto(pev->args[i].type, nomem_out); } - arch__fix_tev_from_maps(pev, tev, map); + arch__fix_tev_from_maps(pev, tev, map, sym); } if (ret == skipped) { ret = -ENOENT; @@ -2743,9 +2763,13 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, { int ret; - if (pev->uprobes && !pev->group) { - /* Replace group name if not given */ - ret = convert_exec_to_group(pev->target, &pev->group); + if (!pev->group) { + /* Set group name if not given */ + if (!pev->uprobes) { + pev->group = strdup(PERFPROBE_GROUP); + ret = pev->group ? 0 : -ENOMEM; + } else + ret = convert_exec_to_group(pev->target, &pev->group); if (ret != 0) { pr_warning("Failed to make a group name.\n"); return ret; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index ba926c30f8cd..5a27eb4fad05 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -114,52 +114,48 @@ int init_probe_symbol_maps(bool user_only); void exit_probe_symbol_maps(void); /* Command string to events */ -extern int parse_perf_probe_command(const char *cmd, - struct perf_probe_event *pev); -extern int parse_probe_trace_command(const char *cmd, - struct probe_trace_event *tev); +int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); +int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); /* Events to command string */ -extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); -extern char *synthesize_probe_trace_command(struct probe_trace_event *tev); -extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, - size_t len); +char *synthesize_perf_probe_command(struct perf_probe_event *pev); +char *synthesize_probe_trace_command(struct probe_trace_event *tev); +char *synthesize_perf_probe_arg(struct perf_probe_arg *pa); /* Check the perf_probe_event needs debuginfo */ -extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); +bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); /* Release event contents */ -extern void clear_perf_probe_event(struct perf_probe_event *pev); -extern void clear_probe_trace_event(struct probe_trace_event *tev); +void clear_perf_probe_event(struct perf_probe_event *pev); +void clear_probe_trace_event(struct probe_trace_event *tev); /* Command string to line-range */ -extern int parse_line_range_desc(const char *cmd, struct line_range *lr); +int parse_line_range_desc(const char *cmd, struct line_range *lr); /* Release line range members */ -extern void line_range__clear(struct line_range *lr); +void line_range__clear(struct line_range *lr); /* Initialize line range */ -extern int line_range__init(struct line_range *lr); - -extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int del_perf_probe_events(struct strfilter *filter); - -extern int show_perf_probe_event(const char *group, const char *event, - struct perf_probe_event *pev, - const char *module, bool use_stdout); -extern int show_perf_probe_events(struct strfilter *filter); -extern int show_line_range(struct line_range *lr, const char *module, - bool user); -extern int show_available_vars(struct perf_probe_event *pevs, int npevs, - struct strfilter *filter); -extern int show_available_funcs(const char *module, struct strfilter *filter, - bool user); +int line_range__init(struct line_range *lr); + +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); +void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int del_perf_probe_events(struct strfilter *filter); + +int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout); +int show_perf_probe_events(struct strfilter *filter); +int show_line_range(struct line_range *lr, const char *module, bool user); +int show_available_vars(struct perf_probe_event *pevs, int npevs, + struct strfilter *filter); +int show_available_funcs(const char *module, struct strfilter *filter, bool user); bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map); + struct probe_trace_event *tev, struct map *map, + struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ int e_snprintf(char *str, size_t size, const char *format, ...) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index e3b3b92e4458..3fe6214970e6 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -220,8 +220,7 @@ int probe_file__add_event(int fd, struct probe_trace_event *tev) pr_debug("Writing event: %s\n", buf); if (!probe_event_dry_run) { - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) { + if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) { ret = -errno; pr_warning("Failed to write event: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 4ce5c5e18f48..1259839dbf6d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -553,7 +553,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) { Dwarf_Die vr_die; - char buf[32], *ptr; + char *buf, *ptr; int ret = 0; /* Copy raw parameters */ @@ -563,13 +563,13 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); else { - ret = synthesize_perf_probe_arg(pf->pvar, buf, 32); - if (ret < 0) - return ret; + buf = synthesize_perf_probe_arg(pf->pvar); + if (!buf) + return -ENOMEM; ptr = strchr(buf, ':'); /* Change type separator to _ */ if (ptr) *ptr = '_'; - pf->tvar->name = strdup(buf); + pf->tvar->name = buf; } if (pf->tvar->name == NULL) return -ENOMEM; @@ -1294,6 +1294,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) { struct available_var_finder *af = data; struct variable_list *vl; + struct strbuf buf = STRBUF_INIT; int tag, ret; vl = &af->vls[af->nvls - 1]; @@ -1307,25 +1308,26 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; - struct strbuf buf; - strbuf_init(&buf, 64); + if (strbuf_init(&buf, 64) < 0) + goto error; if (probe_conf.show_location_range) { - if (!externs) { - if (ret) - strbuf_addf(&buf, "[INV]\t"); - else - strbuf_addf(&buf, "[VAL]\t"); - } else - strbuf_addf(&buf, "[EXT]\t"); + if (!externs) + ret2 = strbuf_add(&buf, + ret ? "[INV]\t" : "[VAL]\t", 6); + else + ret2 = strbuf_add(&buf, "[EXT]\t", 6); + if (ret2) + goto error; } ret2 = die_get_varname(die_mem, &buf); if (!ret2 && probe_conf.show_location_range && !externs) { - strbuf_addf(&buf, "\t"); + if (strbuf_addch(&buf, '\t') < 0) + goto error; ret2 = die_get_var_range(&af->pf.sp_die, die_mem, &buf); } @@ -1343,6 +1345,10 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) return DIE_FIND_CB_CONTINUE; else return DIE_FIND_CB_SIBLING; +error: + strbuf_release(&buf); + pr_debug("Error in strbuf\n"); + return DIE_FIND_CB_END; } /* Add a found vars into available variables list */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 0aec7704e395..51137fccb9c8 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -34,27 +34,25 @@ struct debuginfo { }; /* This also tries to open distro debuginfo */ -extern struct debuginfo *debuginfo__new(const char *path); -extern void debuginfo__delete(struct debuginfo *dbg); +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -extern int debuginfo__find_trace_events(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct probe_trace_event **tevs); +int debuginfo__find_trace_events(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ -extern int debuginfo__find_probe_point(struct debuginfo *dbg, - unsigned long addr, - struct perf_probe_point *ppt); +int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, + struct perf_probe_point *ppt); /* Find a line range */ -extern int debuginfo__find_line_range(struct debuginfo *dbg, - struct line_range *lr); +int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); /* Find available variables */ -extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct variable_list **vls); +int debuginfo__find_available_vars_at(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct variable_list **vls); /* Find a src file from a DWARF tag path */ int get_real_path(const char *raw_path, const char *comp_dir, diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 8162ba0e2e57..36c6862119e3 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -23,3 +23,4 @@ util/strlist.c util/trace-event.c ../lib/rbtree.c util/string.c +util/symbol_fprintf.c diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 01f03242b86a..c6d4ee2de752 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -17,38 +17,42 @@ static inline int need_bs_quote(char c) return (c == '\'' || c == '!'); } -static void sq_quote_buf(struct strbuf *dst, const char *src) +static int sq_quote_buf(struct strbuf *dst, const char *src) { char *to_free = NULL; + int ret; if (dst->buf == src) to_free = strbuf_detach(dst, NULL); - strbuf_addch(dst, '\''); - while (*src) { + ret = strbuf_addch(dst, '\''); + while (!ret && *src) { size_t len = strcspn(src, "'!"); - strbuf_add(dst, src, len); + ret = strbuf_add(dst, src, len); src += len; - while (need_bs_quote(*src)) { - strbuf_addstr(dst, "'\\"); - strbuf_addch(dst, *src++); - strbuf_addch(dst, '\''); - } + while (!ret && need_bs_quote(*src)) + ret = strbuf_addf(dst, "'\\%c\'", *src++); } - strbuf_addch(dst, '\''); + if (!ret) + ret = strbuf_addch(dst, '\''); free(to_free); + + return ret; } -void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) +int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) { - int i; + int i, ret; /* Copy into destination buffer. */ - strbuf_grow(dst, 255); - for (i = 0; argv[i]; ++i) { - strbuf_addch(dst, ' '); - sq_quote_buf(dst, argv[i]); + ret = strbuf_grow(dst, 255); + for (i = 0; !ret && argv[i]; ++i) { + ret = strbuf_addch(dst, ' '); + if (ret) + break; + ret = sq_quote_buf(dst, argv[i]); if (maxlen && dst->len > maxlen) die("Too many or long arguments"); } + return ret; } diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 172889ea234f..e1ec19146fb0 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -24,6 +24,6 @@ * sq_quote() in a real application. */ -extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); +int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); #endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h new file mode 100644 index 000000000000..abc76e3d3098 --- /dev/null +++ b/tools/perf/util/rb_resort.h @@ -0,0 +1,149 @@ +#ifndef _PERF_RESORT_RB_H_ +#define _PERF_RESORT_RB_H_ +/* + * Template for creating a class to resort an existing rb_tree according to + * a new sort criteria, that must be present in the entries of the source + * rb_tree. + * + * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Quick example, resorting threads by its shortname: + * + * First define the prefix (threads) to be used for the functions and data + * structures created, and provide an expression for the sorting, then the + * fields to be present in each of the entries in the new, sorted, rb_tree. + * + * The body of the init function should collect the fields, maybe + * pre-calculating them from multiple entries in the original 'entry' from + * the rb_tree used as a source for the entries to be sorted: + +DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, + b->thread->shortname) < 0, + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); +} + + * After this it is just a matter of instantiating it and iterating it, + * for a few data structures with existing rb_trees, such as 'struct machine', + * helpers are available to get the rb_root and the nr_entries: + + DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); + + * This will instantiate the new rb_tree and a cursor for it, that can be used as: + + struct rb_node *nd; + + resort_rb__for_each(nd, threads) { + struct thread *t = threads_entry; + printf("%s: %d\n", t->shortname, t->tid); + } + + * Then delete it: + + resort_rb__delete(threads); + + * The name of the data structures and functions will have a _sorted suffix + * right before the method names, i.e. will look like: + * + * struct threads_sorted_entry {} + * threads_sorted__insert() + */ + +#define DEFINE_RESORT_RB(__name, __comp, ...) \ +struct __name##_sorted_entry { \ + struct rb_node rb_node; \ + __VA_ARGS__ \ +}; \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry); \ + \ +static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ +{ \ + struct __name##_sorted_entry *a, *b; \ + a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ + b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ + return __comp; \ +} \ + \ +struct __name##_sorted { \ + struct rb_root entries; \ + struct __name##_sorted_entry nd[0]; \ +}; \ + \ +static void __name##_sorted__insert(struct __name##_sorted *sorted, \ + struct rb_node *sorted_nd) \ +{ \ + struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ + while (*p != NULL) { \ + parent = *p; \ + if (__name##_sorted__cmp(sorted_nd, parent)) \ + p = &(*p)->rb_left; \ + else \ + p = &(*p)->rb_right; \ + } \ + rb_link_node(sorted_nd, parent, p); \ + rb_insert_color(sorted_nd, &sorted->entries); \ +} \ + \ +static void __name##_sorted__sort(struct __name##_sorted *sorted, \ + struct rb_root *entries) \ +{ \ + struct rb_node *nd; \ + unsigned int i = 0; \ + for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ + struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ + __name##_sorted__init_entry(nd, snd); \ + __name##_sorted__insert(sorted, &snd->rb_node); \ + } \ +} \ + \ +static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ + int nr_entries) \ +{ \ + struct __name##_sorted *sorted; \ + sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ + if (sorted) { \ + sorted->entries = RB_ROOT; \ + __name##_sorted__sort(sorted, entries); \ + } \ + return sorted; \ +} \ + \ +static void __name##_sorted__delete(struct __name##_sorted *sorted) \ +{ \ + free(sorted); \ +} \ + \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry) + +#define DECLARE_RESORT_RB(__name) \ +struct __name##_sorted_entry *__name##_entry; \ +struct __name##_sorted *__name = __name##_sorted__new + +#define resort_rb__for_each(__nd, __name) \ + for (__nd = rb_first(&__name->entries); \ + __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ + rb_node), __nd; \ + __nd = rb_next(__nd)) + +#define resort_rb__delete(__name) \ + __name##_sorted__delete(__name), __name = NULL + +/* + * Helpers for other classes that contains both an rbtree and the + * number of entries in it: + */ + +/* For 'struct intlist' */ +#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ + DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ + __ilist->rblist.nr_entries) + +/* For 'struct machine->threads' */ +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ + DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) + +#endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 0467367dc315..481792c7484b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -129,7 +129,8 @@ bool perf_can_record_cpu_wide(void) return true; } -void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) +void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, + struct callchain_param *callchain) { struct perf_evsel *evsel; bool use_sample_identifier = false; @@ -148,7 +149,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) use_comm_exec = perf_can_comm_exec(); evlist__for_each(evlist, evsel) { - perf_evsel__config(evsel, opts); + perf_evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->attr.comm_exec = 1; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b3aabc0d4eb0..62c7f6988e0e 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -31,6 +31,8 @@ #include <perl.h> #include "../../perf.h" +#include "../callchain.h" +#include "../machine.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" @@ -248,10 +250,90 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } +static SV *perl_process_callchain(struct perf_sample *sample, + struct perf_evsel *evsel, + struct addr_location *al) +{ + AV *list; + + list = newAV(); + if (!list) + goto exit; + + if (!symbol_conf.use_callchain || !sample->callchain) + goto exit; + + if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, + sysctl_perf_event_max_stack) != 0) { + pr_err("Failed to resolve callchain. Skipping\n"); + goto exit; + } + callchain_cursor_commit(&callchain_cursor); + + + while (1) { + HV *elem; + struct callchain_cursor_node *node; + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + + elem = newHV(); + if (!elem) + goto exit; + + if (!hv_stores(elem, "ip", newSVuv(node->ip))) { + hv_undef(elem); + goto exit; + } + + if (node->sym) { + HV *sym = newHV(); + if (!sym) { + hv_undef(elem); + goto exit; + } + if (!hv_stores(sym, "start", newSVuv(node->sym->start)) || + !hv_stores(sym, "end", newSVuv(node->sym->end)) || + !hv_stores(sym, "binding", newSVuv(node->sym->binding)) || + !hv_stores(sym, "name", newSVpvn(node->sym->name, + node->sym->namelen)) || + !hv_stores(elem, "sym", newRV_noinc((SV*)sym))) { + hv_undef(sym); + hv_undef(elem); + goto exit; + } + } + + if (node->map) { + struct map *map = node->map; + const char *dsoname = "[unknown]"; + if (map && map->dso && (map->dso->name || map->dso->long_name)) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else if (map->dso->name) + dsoname = map->dso->name; + } + if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) { + hv_undef(elem); + goto exit; + } + } + + callchain_cursor_advance(&callchain_cursor); + av_push(list, newRV_noinc((SV*)elem)); + } + +exit: + return newRV_noinc((SV*)list); +} + static void perl_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread) + struct addr_location *al) { + struct thread *thread = al->thread; struct event_format *event = evsel->tp_format; struct format_field *field; static char handler[256]; @@ -295,6 +377,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(ns))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); /* common fields other than pid can be accessed via xsub fns */ @@ -329,6 +412,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(nsecs))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); call_pv("main::trace_unhandled", G_SCALAR); } SPAGAIN; @@ -366,7 +450,7 @@ static void perl_process_event(union perf_event *event, struct perf_evsel *evsel, struct addr_location *al) { - perl_process_tracepoint(sample, evsel, al->thread); + perl_process_tracepoint(sample, evsel, al); perl_process_event_generic(event, sample, evsel); } @@ -490,7 +574,27 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "use Perf::Trace::Util;\n\n"); fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); - fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); + fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n"); + + + fprintf(ofp, "\n\ +sub print_backtrace\n\ +{\n\ + my $callchain = shift;\n\ + for my $node (@$callchain)\n\ + {\n\ + if(exists $node->{sym})\n\ + {\n\ + printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\ + }\n\ + else\n\ + {\n\ + printf( \"\\t[\\%%x]\\n\", $node{ip});\n\ + }\n\ + }\n\ +}\n\n\ +"); + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); @@ -502,7 +606,8 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$common_secs, "); fprintf(ofp, "$common_nsecs,\n"); fprintf(ofp, "\t $common_pid, "); - fprintf(ofp, "$common_comm,\n\t "); + fprintf(ofp, "$common_comm, "); + fprintf(ofp, "$common_callchain,\n\t "); not_first = 0; count = 0; @@ -519,7 +624,7 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm);\n\n"); + "$common_pid, $common_comm, $common_callchain);\n\n"); fprintf(ofp, "\tprintf(\""); @@ -581,17 +686,22 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$%s", f->name); } - fprintf(ofp, ");\n"); + fprintf(ofp, ");\n\n"); + + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); } fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, " "$common_cpu, $common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm) = @_;\n\n"); + "$common_pid, $common_comm, $common_callchain) = @_;\n\n"); fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t $common_pid, " - "$common_comm);\n}\n\n"); + "$common_comm, $common_callchain);\n"); + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); fprintf(ofp, "sub print_header\n{\n" "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index fbd05242b4e5..ff134700bf30 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -41,6 +41,7 @@ #include "../thread-stack.h" #include "../trace-event.h" #include "../machine.h" +#include "../call-path.h" #include "thread_map.h" #include "cpumap.h" #include "stat.h" @@ -323,7 +324,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (!symbol_conf.use_callchain || !sample->callchain) goto exit; - if (thread__resolve_callchain(al->thread, evsel, + if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, sample, NULL, NULL, scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); @@ -407,8 +408,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (!t) Py_FatalError("couldn't create Python tuple"); - if (!event) - die("ug! no event found for type %d", (int)evsel->attr.config); + if (!event) { + snprintf(handler_name, sizeof(handler_name), + "ug! no event found for type %" PRIu64, (u64)evsel->attr.config); + Py_FatalError(handler_name); + } pid = raw_field_value(event, "common_pid", data); @@ -614,7 +618,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, struct machine *machine) { struct tables *tables = container_of(dbe, struct tables, dbe); - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); @@ -681,7 +685,7 @@ static int python_export_sample(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(21); + t = tuple_new(22); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -704,6 +708,7 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 18, es->sample->data_src); tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); + tuple_set_u64(t, 21, es->call_path_id); call_object(tables->sample_handler, t, "sample_table"); @@ -998,8 +1003,10 @@ static void set_table_handlers(struct tables *tables) { const char *perf_db_export_mode = "perf_db_export_mode"; const char *perf_db_export_calls = "perf_db_export_calls"; - PyObject *db_export_mode, *db_export_calls; + const char *perf_db_export_callchains = "perf_db_export_callchains"; + PyObject *db_export_mode, *db_export_calls, *db_export_callchains; bool export_calls = false; + bool export_callchains = false; int ret; memset(tables, 0, sizeof(struct tables)); @@ -1016,6 +1023,7 @@ static void set_table_handlers(struct tables *tables) if (!ret) return; + /* handle export calls */ tables->dbe.crp = NULL; db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls); if (db_export_calls) { @@ -1033,6 +1041,33 @@ static void set_table_handlers(struct tables *tables) Py_FatalError("failed to create calls processor"); } + /* handle export callchains */ + tables->dbe.cpr = NULL; + db_export_callchains = PyDict_GetItemString(main_dict, + perf_db_export_callchains); + if (db_export_callchains) { + ret = PyObject_IsTrue(db_export_callchains); + if (ret == -1) + handler_call_die(perf_db_export_callchains); + export_callchains = !!ret; + } + + if (export_callchains) { + /* + * Attempt to use the call path root from the call return + * processor, if the call return processor is in use. Otherwise, + * we allocate a new call path root. This prevents exporting + * duplicate call path ids when both are in use simultaniously. + */ + if (tables->dbe.crp) + tables->dbe.cpr = tables->dbe.crp->cpr; + else + tables->dbe.cpr = call_path_root__new(); + + if (!tables->dbe.cpr) + Py_FatalError("failed to create call path root"); + } + tables->db_export_mode = true; /* * Reserve per symbol space for symbol->db_id via symbol__priv() diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 60b3593d210d..5214974e841a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -409,6 +409,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->stat = process_stat_stub; if (tool->stat_round == NULL) tool->stat_round = process_stat_round_stub; + if (tool->time_conv == NULL) + tool->time_conv = process_event_op2_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -555,7 +557,7 @@ static u8 revbyte(u8 b) /* * XXX this is hack in attempt to carry flags bitfield - * throught endian village. ABI says: + * through endian village. ABI says: * * Bit-fields are allocated from right to left (least to most significant) * on little-endian implementations and from left to right (most to least @@ -794,6 +796,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT] = perf_event__stat_swap, [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, + [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -904,7 +907,7 @@ static void callchain__printf(struct perf_evsel *evsel, unsigned int i; struct ip_callchain *callchain = sample->callchain; - if (has_branch_callstack(evsel)) + if (perf_evsel__has_branch_callstack(evsel)) callchain__lbr_callstack_printf(sample); printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); @@ -1078,7 +1081,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(evsel, sample); - if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -1107,12 +1110,11 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { - const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct machine *machine; if (perf_guest && - ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || - (cpumode == PERF_RECORD_MISC_GUEST_USER))) { + ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || + (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; if (event->header.type == PERF_RECORD_MMAP @@ -1342,6 +1344,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->stat(tool, event, session); case PERF_RECORD_STAT_ROUND: return tool->stat_round(tool, event, session); + case PERF_RECORD_TIME_CONV: + session->time_conv = event->time_conv; + return tool->time_conv(tool, event, session); default: return -EINVAL; } @@ -1831,7 +1836,11 @@ out: out_err: ui_progress__finish(); perf_session__warn_about_errors(session); - ordered_events__free(&session->ordered_events); + /* + * We may switching perf.data output, make ordered_events + * reusable. + */ + ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); session->one_mmap = false; return err; @@ -1948,105 +1957,6 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, - struct addr_location *al, - unsigned int print_opts, unsigned int stack_depth) -{ - struct callchain_cursor_node *node; - int print_ip = print_opts & PRINT_IP_OPT_IP; - int print_sym = print_opts & PRINT_IP_OPT_SYM; - int print_dso = print_opts & PRINT_IP_OPT_DSO; - int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET; - int print_oneline = print_opts & PRINT_IP_OPT_ONELINE; - int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE; - char s = print_oneline ? ' ' : '\t'; - - if (symbol_conf.use_callchain && sample->callchain) { - struct addr_location node_al; - - if (thread__resolve_callchain(al->thread, evsel, - sample, NULL, NULL, - stack_depth) != 0) { - if (verbose) - error("Failed to resolve callchain. Skipping\n"); - return; - } - callchain_cursor_commit(&callchain_cursor); - - if (print_symoffset) - node_al = *al; - - while (stack_depth) { - u64 addr = 0; - - node = callchain_cursor_current(&callchain_cursor); - if (!node) - break; - - if (node->sym && node->sym->ignore) - goto next; - - if (print_ip) - printf("%c%16" PRIx64, s, node->ip); - - if (node->map) - addr = node->map->map_ip(node->map, node->ip); - - if (print_sym) { - printf(" "); - if (print_symoffset) { - node_al.addr = addr; - node_al.map = node->map; - symbol__fprintf_symname_offs(node->sym, &node_al, stdout); - } else - symbol__fprintf_symname(node->sym, stdout); - } - - if (print_dso) { - printf(" ("); - map__fprintf_dsoname(node->map, stdout); - printf(")"); - } - - if (print_srcline) - map__fprintf_srcline(node->map, addr, "\n ", - stdout); - - if (!print_oneline) - printf("\n"); - - stack_depth--; -next: - callchain_cursor_advance(&callchain_cursor); - } - - } else { - if (al->sym && al->sym->ignore) - return; - - if (print_ip) - printf("%16" PRIx64, sample->ip); - - if (print_sym) { - printf(" "); - if (print_symoffset) - symbol__fprintf_symname_offs(al->sym, al, - stdout); - else - symbol__fprintf_symname(al->sym, stdout); - } - - if (print_dso) { - printf(" ("); - map__fprintf_dsoname(al->map, stdout); - printf(")"); - } - - if (print_srcline) - map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - } -} - int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5f792e35d4c1..4bd758553450 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -26,6 +26,7 @@ struct perf_session { struct itrace_synth_opts *itrace_synth_opts; struct list_head auxtrace_index; struct trace_event tevent; + struct time_conv_event time_conv; bool repipe; bool one_mmap; void *one_mmap_addr; @@ -35,13 +36,6 @@ struct perf_session { struct perf_tool *tool; }; -#define PRINT_IP_OPT_IP (1<<0) -#define PRINT_IP_OPT_SYM (1<<1) -#define PRINT_IP_OPT_DSO (1<<2) -#define PRINT_IP_OPT_SYMOFFSET (1<<3) -#define PRINT_IP_OPT_ONELINE (1<<4) -#define PRINT_IP_OPT_SRCLINE (1<<5) - struct perf_tool; struct perf_session *perf_session__new(struct perf_data_file *file, @@ -103,10 +97,6 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, - struct addr_location *al, - unsigned int print_opts, unsigned int stack_depth); - int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 93fa136b0025..20e69edd5006 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,13 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__need_collapse = 0; -int sort__has_parent = 0; -int sort__has_sym = 0; -int sort__has_dso = 0; -int sort__has_socket = 0; -int sort__has_thread = 0; -int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; /* @@ -244,7 +237,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) * comparing symbol address alone is not enough since it's a * relative address within a dso. */ - if (!sort__has_dso) { + if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) { ret = sort__dso_cmp(left, right); if (ret != 0) return ret; @@ -2163,7 +2156,7 @@ static int __sort_dimension__add(struct sort_dimension *sd, return -1; if (sd->entry->se_collapse) - sort__need_collapse = 1; + list->need_collapse = 1; sd->taken = 1; @@ -2225,7 +2218,7 @@ int hpp_dimension__add_output(unsigned col) } static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct perf_evlist *evlist __maybe_unused, + struct perf_evlist *evlist, int level) { unsigned int i; @@ -2245,9 +2238,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, pr_err("Invalid regex: %s\n%s", parent_pattern, err); return -EINVAL; } - sort__has_parent = 1; + list->parent = 1; } else if (sd->entry == &sort_sym) { - sort__has_sym = 1; + list->sym = 1; /* * perf diff displays the performance difference amongst * two or more perf.data files. Those files could come @@ -2258,13 +2251,13 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, sd->entry->se_collapse = sort__sym_sort; } else if (sd->entry == &sort_dso) { - sort__has_dso = 1; + list->dso = 1; } else if (sd->entry == &sort_socket) { - sort__has_socket = 1; + list->socket = 1; } else if (sd->entry == &sort_thread) { - sort__has_thread = 1; + list->thread = 1; } else if (sd->entry == &sort_comm) { - sort__has_comm = 1; + list->comm = 1; } return __sort_dimension__add(sd, list, level); @@ -2289,7 +2282,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2305,7 +2298,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return -EINVAL; if (sd->entry == &sort_mem_daddr_sym) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2445,6 +2438,9 @@ static char *prefix_if_not_in(const char *pre, char *str) static char *setup_overhead(char *keys) { + if (sort__mode == SORT_MODE__DIFF) + return keys; + keys = prefix_if_not_in("overhead", keys); if (symbol_conf.cumulate_callchain) @@ -2746,10 +2742,10 @@ int setup_sorting(struct perf_evlist *evlist) void reset_output_field(void) { - sort__need_collapse = 0; - sort__has_parent = 0; - sort__has_sym = 0; - sort__has_dso = 0; + perf_hpp_list.need_collapse = 0; + perf_hpp_list.parent = 0; + perf_hpp_list.sym = 0; + perf_hpp_list.dso = 0; field_order = NULL; sort_order = NULL; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f4e35998119..42927f448bcb 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,13 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__need_collapse; -extern int sort__has_dso; -extern int sort__has_parent; -extern int sort__has_sym; -extern int sort__has_socket; -extern int sort__has_thread; -extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index b33ffb2af2cf..fdb71961143e 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -152,8 +152,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio) } static void print_stalled_cycles_frontend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { double total, ratio = 0.0; @@ -175,8 +174,7 @@ static void print_stalled_cycles_frontend(int cpu, } static void print_stalled_cycles_backend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { double total, ratio = 0.0; @@ -194,7 +192,7 @@ static void print_stalled_cycles_backend(int cpu, } static void print_branch_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -213,7 +211,7 @@ static void print_branch_misses(int cpu, } static void print_l1_dcache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -232,7 +230,7 @@ static void print_l1_dcache_misses(int cpu, } static void print_l1_icache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -250,7 +248,7 @@ static void print_l1_icache_misses(int cpu, } static void print_dtlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -268,7 +266,7 @@ static void print_dtlb_cache_misses(int cpu, } static void print_itlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -286,7 +284,7 @@ static void print_itlb_cache_misses(int cpu, } static void print_ll_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d9b481cf3b6..ffa1d0653861 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -307,6 +307,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->priv; u64 *count = counter->counts->aggr.values; + u64 val; int i, ret; aggr->val = aggr->ena = aggr->run = 0; @@ -346,7 +347,8 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, count, 0); + val = counter->scale * *count; + perf_stat__update_shadow_stats(counter, &val, 0); return 0; } diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index d3d279275432..f95f682aa2b2 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,3 +1,4 @@ +#include "debug.h" #include "cache.h" #include <linux/kernel.h> @@ -17,12 +18,13 @@ int prefixcmp(const char *str, const char *prefix) */ char strbuf_slopbuf[1]; -void strbuf_init(struct strbuf *sb, ssize_t hint) +int strbuf_init(struct strbuf *sb, ssize_t hint) { sb->alloc = sb->len = 0; sb->buf = strbuf_slopbuf; if (hint) - strbuf_grow(sb, hint); + return strbuf_grow(sb, hint); + return 0; } void strbuf_release(struct strbuf *sb) @@ -42,60 +44,104 @@ char *strbuf_detach(struct strbuf *sb, size_t *sz) return res; } -void strbuf_grow(struct strbuf *sb, size_t extra) +int strbuf_grow(struct strbuf *sb, size_t extra) { - if (sb->len + extra + 1 <= sb->len) - die("you want to use way too much memory"); - if (!sb->alloc) - sb->buf = NULL; - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); + char *buf; + size_t nr = sb->len + extra + 1; + + if (nr < sb->alloc) + return 0; + + if (nr <= sb->len) + return -E2BIG; + + if (alloc_nr(sb->alloc) > nr) + nr = alloc_nr(sb->alloc); + + /* + * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is + * a static variable. Thus we have to avoid passing it to realloc. + */ + buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf)); + if (!buf) + return -ENOMEM; + + sb->buf = buf; + sb->alloc = nr; + return 0; } -void strbuf_add(struct strbuf *sb, const void *data, size_t len) +int strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, len); + int ret = strbuf_grow(sb, 1); + if (ret) + return ret; + + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; + return 0; +} + +int strbuf_add(struct strbuf *sb, const void *data, size_t len) +{ + int ret = strbuf_grow(sb, len); + if (ret) + return ret; + memcpy(sb->buf + sb->len, data, len); - strbuf_setlen(sb, sb->len + len); + return strbuf_setlen(sb, sb->len + len); } -void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) +static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { - int len; + int len, ret; va_list ap_saved; - if (!strbuf_avail(sb)) - strbuf_grow(sb, 64); + if (!strbuf_avail(sb)) { + ret = strbuf_grow(sb, 64); + if (ret) + return ret; + } va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); if (len < 0) - die("your vsnprintf is broken"); + return len; if (len > strbuf_avail(sb)) { - strbuf_grow(sb, len); + ret = strbuf_grow(sb, len); + if (ret) + return ret; len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { - die("this should not happen, your vsnprintf is broken"); + pr_debug("this should not happen, your vsnprintf is broken"); + return -EINVAL; } } - strbuf_setlen(sb, sb->len + len); + return strbuf_setlen(sb, sb->len + len); } -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +int strbuf_addf(struct strbuf *sb, const char *fmt, ...) { va_list ap; + int ret; va_start(ap, fmt); - strbuf_addv(sb, fmt, ap); + ret = strbuf_addv(sb, fmt, ap); va_end(ap); + return ret; } ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; size_t oldalloc = sb->alloc; + int ret; + + ret = strbuf_grow(sb, hint ? hint : 8192); + if (ret) + return ret; - strbuf_grow(sb, hint ? hint : 8192); for (;;) { ssize_t cnt; @@ -105,12 +151,14 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) strbuf_release(sb); else strbuf_setlen(sb, oldlen); - return -1; + return cnt; } if (!cnt) break; sb->len += cnt; - strbuf_grow(sb, 8192); + ret = strbuf_grow(sb, 8192); + if (ret) + return ret; } sb->buf[sb->len] = '\0'; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 7a32c838884d..54b409297d4a 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -51,42 +51,42 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *buf, ssize_t hint); -extern void strbuf_release(struct strbuf *); -extern char *strbuf_detach(struct strbuf *, size_t *); +int strbuf_init(struct strbuf *buf, ssize_t hint); +void strbuf_release(struct strbuf *buf); +char *strbuf_detach(struct strbuf *buf, size_t *); /*----- strbuf size related -----*/ static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } -extern void strbuf_grow(struct strbuf *, size_t); +int strbuf_grow(struct strbuf *buf, size_t); -static inline void strbuf_setlen(struct strbuf *sb, size_t len) { - if (!sb->alloc) - strbuf_grow(sb, 0); +static inline int strbuf_setlen(struct strbuf *sb, size_t len) { + int ret; + if (!sb->alloc) { + ret = strbuf_grow(sb, 0); + if (ret) + return ret; + } assert(len < sb->alloc); sb->len = len; sb->buf[len] = '\0'; + return 0; } /*----- add data in your buffer -----*/ -static inline void strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); - sb->buf[sb->len++] = c; - sb->buf[sb->len] = '\0'; -} +int strbuf_addch(struct strbuf *sb, int c); -extern void strbuf_add(struct strbuf *, const void *, size_t); -static inline void strbuf_addstr(struct strbuf *sb, const char *s) { - strbuf_add(sb, s, strlen(s)); +int strbuf_add(struct strbuf *buf, const void *, size_t); +static inline int strbuf_addstr(struct strbuf *sb, const char *s) { + return strbuf_add(sb, s, strlen(s)); } __attribute__((format(printf,2,3))) -extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); -extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap); +int strbuf_addf(struct strbuf *sb, const char *fmt, ...); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); #endif /* __PERF_STRBUF_H */ diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index 9292a5291445..946fdf2db97c 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -3,32 +3,31 @@ #include <linux/types.h> -extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); -extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_box(int Yslot, u64 start, u64 end, const char *type); -extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); - - -extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); -extern void svg_cstate(int cpu, u64 start, u64 end, int type); -extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq); - - -extern void svg_time_grid(double min_thickness); -extern void svg_io_legenda(void); -extern void svg_legenda(void); -extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); -extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); -extern void svg_interrupt(u64 start, int row, const char *backtrace); -extern void svg_text(int Yslot, u64 start, const char *text); -extern void svg_close(void); -extern int svg_build_topology_map(char *sib_core, int sib_core_nr, - char *sib_thr, int sib_thr_nr); +void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); +void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_box(int Yslot, u64 start, u64 end, const char *type); +void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); + + +void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); +void svg_cstate(int cpu, u64 start, u64 end, int type); +void svg_pstate(int cpu, u64 start, u64 end, u64 freq); + + +void svg_time_grid(double min_thickness); +void svg_io_legenda(void); +void svg_legenda(void); +void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); +void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); +void svg_interrupt(u64 start, int row, const char *backtrace); +void svg_text(int Yslot, u64 start, const char *text); +void svg_close(void); +int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr); extern int svg_page_width; extern u64 svg_highlight; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b1dd68f358fc..87a297dd8901 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -709,17 +709,10 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (ss->opdshdr.sh_type != SHT_PROGBITS) ss->opdsec = NULL; - if (dso->kernel == DSO_TYPE_USER) { - GElf_Shdr shdr; - ss->adjust_symbols = (ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - dso__is_vdso(dso) || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { + if (dso->kernel == DSO_TYPE_USER) + ss->adjust_symbols = true; + else ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); - } ss->name = strdup(name); if (!ss->name) { @@ -777,7 +770,8 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } -void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { } +void __weak arch__sym_update(struct symbol *s __maybe_unused, + GElf_Sym *sym __maybe_unused) { } int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, @@ -793,6 +787,7 @@ int dso__load_sym(struct dso *dso, struct map *map, uint32_t idx; GElf_Ehdr ehdr; GElf_Shdr shdr; + GElf_Shdr tshdr; Elf_Data *syms, *opddata = NULL; GElf_Sym sym; Elf_Scn *sec, *sec_strndx; @@ -832,6 +827,9 @@ int dso__load_sym(struct dso *dso, struct map *map, sec = syms_ss->symtab; shdr = syms_ss->symshdr; + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; + if (runtime_ss->opdsec) opddata = elf_rawdata(runtime_ss->opdsec, NULL); @@ -880,12 +878,8 @@ int dso__load_sym(struct dso *dso, struct map *map, * Handle any relocation of vdso necessary because older kernels * attempted to prelink vdso to its virtual address. */ - if (dso__is_vdso(dso)) { - GElf_Shdr tshdr; - - if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) - map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; - } + if (dso__is_vdso(dso)) + map->reloc = map->start - dso->text_offset; dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* @@ -954,8 +948,6 @@ int dso__load_sym(struct dso *dso, struct map *map, (sym.st_value & 1)) --sym.st_value; - arch__elf_sym_adjust(&sym); - if (dso->kernel || kmodule) { char dso_name[PATH_MAX]; @@ -1089,6 +1081,8 @@ new_symbol: if (!f) goto out_elf_end; + arch__sym_update(f, &sym); + if (filter && filter(curr_map, f)) symbol__delete(f); else { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e7588dc91518..7fb33304fb4e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -255,40 +255,6 @@ void symbol__delete(struct symbol *sym) free(((void *)sym) - symbol_conf.priv_size); } -size_t symbol__fprintf(struct symbol *sym, FILE *fp) -{ - return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n", - sym->start, sym->end, - sym->binding == STB_GLOBAL ? 'g' : - sym->binding == STB_LOCAL ? 'l' : 'w', - sym->name); -} - -size_t symbol__fprintf_symname_offs(const struct symbol *sym, - const struct addr_location *al, FILE *fp) -{ - unsigned long offset; - size_t length; - - if (sym && sym->name) { - length = fprintf(fp, "%s", sym->name); - if (al) { - if (al->addr < sym->end) - offset = al->addr - sym->start; - else - offset = al->addr - al->map->start - sym->start; - length += fprintf(fp, "+0x%lx", offset); - } - return length; - } else - return fprintf(fp, "[unknown]"); -} - -size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) -{ - return symbol__fprintf_symname_offs(sym, NULL, fp); -} - void symbols__delete(struct rb_root *symbols) { struct symbol *pos; @@ -335,7 +301,7 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) if (ip < s->start) n = n->rb_left; - else if (ip >= s->end) + else if (ip > s->end || (ip == s->end && ip != s->start)) n = n->rb_right; else return s; @@ -364,11 +330,6 @@ static struct symbol *symbols__next(struct symbol *sym) return NULL; } -struct symbol_name_rb_node { - struct rb_node rb_node; - struct symbol sym; -}; - static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) { struct rb_node **p = &symbols->rb_node; @@ -452,6 +413,18 @@ void dso__reset_find_symbol_cache(struct dso *dso) } } +void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) +{ + symbols__insert(&dso->symbols[type], sym); + + /* update the symbol cache if necessary */ + if (dso->last_find_result[type].addr >= sym->start && + (dso->last_find_result[type].addr < sym->end || + sym->start == sym->end)) { + dso->last_find_result[type].symbol = sym; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { @@ -497,21 +470,6 @@ void dso__sort_by_name(struct dso *dso, enum map_type type) &dso->symbols[type]); } -size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - struct symbol_name_rb_node *pos; - - for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - fprintf(fp, "%s\n", pos->sym.name); - } - - return ret; -} - int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start)) @@ -1262,8 +1220,8 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) return 0; } -int dso__load_kallsyms(struct dso *dso, const char *filename, - struct map *map, symbol_filter_t filter) +int __dso__load_kallsyms(struct dso *dso, const char *filename, + struct map *map, bool no_kcore, symbol_filter_t filter) { u64 delta = 0; @@ -1284,12 +1242,18 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, else dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; - if (!dso__load_kcore(dso, map, filename)) + if (!no_kcore && !dso__load_kcore(dso, map, filename)) return dso__split_kallsyms_for_kcore(dso, map, filter); else return dso__split_kallsyms(dso, map, delta, filter); } +int dso__load_kallsyms(struct dso *dso, const char *filename, + struct map *map, symbol_filter_t filter) +{ + return __dso__load_kallsyms(dso, filename, map, false, filter); +} + static int dso__load_perf_map(struct dso *dso, struct map *map, symbol_filter_t filter) { @@ -1644,25 +1608,27 @@ out: return err; } +static bool visible_dir_filter(const char *name, struct dirent *d) +{ + if (d->d_type != DT_DIR) + return false; + return lsdir_no_dot_filter(name, d); +} + static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) { char kallsyms_filename[PATH_MAX]; - struct dirent *dent; int ret = -1; - DIR *d; + struct strlist *dirs; + struct str_node *nd; - d = opendir(dir); - if (!d) + dirs = lsdir(dir, visible_dir_filter); + if (!dirs) return -1; - while (1) { - dent = readdir(d); - if (!dent) - break; - if (dent->d_type != DT_DIR) - continue; + strlist__for_each(nd, dirs) { scnprintf(kallsyms_filename, sizeof(kallsyms_filename), - "%s/%s/kallsyms", dir, dent->d_name); + "%s/%s/kallsyms", dir, nd->s); if (!validate_kcore_addresses(kallsyms_filename, map)) { strlcpy(dir, kallsyms_filename, dir_sz); ret = 0; @@ -1670,7 +1636,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) } } - closedir(d); + strlist__delete(dirs); return ret; } @@ -1678,7 +1644,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { u8 host_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a937053a0ae0..2b5e4ed76fcb 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -34,8 +34,8 @@ #endif #ifdef HAVE_LIBELF_SUPPORT -extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, size_t *idx); +Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, size_t *idx); #endif #ifndef DMGL_PARAMS @@ -55,6 +55,7 @@ struct symbol { u16 namelen; u8 binding; bool ignore; + u8 arch_sym; char name[0]; }; @@ -140,6 +141,11 @@ struct symbol_conf { extern struct symbol_conf symbol_conf; +struct symbol_name_rb_node { + struct rb_node rb_node; + struct symbol sym; +}; + static inline int __symbol__join_symfs(char *bf, size_t size, const char *path) { return path__join(bf, size, symbol_conf.symfs, path); @@ -235,9 +241,14 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, symbol_filter_t filter); int dso__load_vmlinux_path(struct dso *dso, struct map *map, symbol_filter_t filter); +int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, + bool no_kcore, symbol_filter_t filter); int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter); +void dso__insert_symbol(struct dso *dso, enum map_type type, + struct symbol *sym); + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, @@ -262,8 +273,14 @@ int symbol__init(struct perf_env *env); void symbol__exit(void); void symbol__elf_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); +size_t __symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); +size_t __symbol__fprintf_symname(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); size_t symbol__fprintf(struct symbol *sym, FILE *fp); bool symbol_type__is_a(char symbol_type, enum map_type map_type); @@ -310,7 +327,7 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); -void arch__elf_sym_adjust(GElf_Sym *sym); +void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif #define SYMBOL_A 0 diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c new file mode 100644 index 000000000000..a680bdaa65dc --- /dev/null +++ b/tools/perf/util/symbol_fprintf.c @@ -0,0 +1,71 @@ +#include <elf.h> +#include <inttypes.h> +#include <stdio.h> + +#include "symbol.h" + +size_t symbol__fprintf(struct symbol *sym, FILE *fp) +{ + return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n", + sym->start, sym->end, + sym->binding == STB_GLOBAL ? 'g' : + sym->binding == STB_LOCAL ? 'l' : 'w', + sym->name); +} + +size_t __symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp) +{ + unsigned long offset; + size_t length; + + if (sym && sym->name) { + length = fprintf(fp, "%s", sym->name); + if (al) { + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + length += fprintf(fp, "+0x%lx", offset); + } + return length; + } else if (al && unknown_as_addr) + return fprintf(fp, "[%#" PRIx64 "]", al->addr); + else + return fprintf(fp, "[unknown]"); +} + +size_t symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, al, false, fp); +} + +size_t __symbol__fprintf_symname(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); +} + +size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, NULL, false, fp); +} + +size_t dso__fprintf_symbols_by_name(struct dso *dso, + enum map_type type, FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + struct symbol_name_rb_node *pos; + + for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); + fprintf(fp, "%s\n", pos->sym.name); + } + + return ret; +} diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c new file mode 100644 index 000000000000..bbb4c1957578 --- /dev/null +++ b/tools/perf/util/syscalltbl.c @@ -0,0 +1,134 @@ +/* + * System call table mapper + * + * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "syscalltbl.h" +#include <stdlib.h> + +#ifdef HAVE_SYSCALL_TABLE +#include <linux/compiler.h> +#include <string.h> +#include "util.h" + +#if defined(__x86_64__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_x86_64; +#endif + +struct syscall { + int id; + const char *name; +}; + +static int syscallcmpname(const void *vkey, const void *ventry) +{ + const char *key = vkey; + const struct syscall *entry = ventry; + + return strcmp(key, entry->name); +} + +static int syscallcmp(const void *va, const void *vb) +{ + const struct syscall *a = va, *b = vb; + + return strcmp(a->name, b->name); +} + +static int syscalltbl__init_native(struct syscalltbl *tbl) +{ + int nr_entries = 0, i, j; + struct syscall *entries; + + for (i = 0; i <= syscalltbl_native_max_id; ++i) + if (syscalltbl_native[i]) + ++nr_entries; + + entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); + if (tbl->syscalls.entries == NULL) + return -1; + + for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { + if (syscalltbl_native[i]) { + entries[j].name = syscalltbl_native[i]; + entries[j].id = i; + ++j; + } + } + + qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp); + tbl->syscalls.nr_entries = nr_entries; + return 0; +} + +struct syscalltbl *syscalltbl__new(void) +{ + struct syscalltbl *tbl = malloc(sizeof(*tbl)); + if (tbl) { + if (syscalltbl__init_native(tbl)) { + free(tbl); + return NULL; + } + } + return tbl; +} + +void syscalltbl__delete(struct syscalltbl *tbl) +{ + zfree(&tbl->syscalls.entries); + free(tbl); +} + +const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id) +{ + return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL; +} + +int syscalltbl__id(struct syscalltbl *tbl, const char *name) +{ + struct syscall *sc = bsearch(name, tbl->syscalls.entries, + tbl->syscalls.nr_entries, sizeof(*sc), + syscallcmpname); + + return sc ? sc->id : -1; +} + +#else /* HAVE_SYSCALL_TABLE */ + +#include <libaudit.h> + +struct syscalltbl *syscalltbl__new(void) +{ + struct syscalltbl *tbl = malloc(sizeof(*tbl)); + if (tbl) + tbl->audit_machine = audit_detect_machine(); + return tbl; +} + +void syscalltbl__delete(struct syscalltbl *tbl) +{ + free(tbl); +} + +const char *syscalltbl__name(const struct syscalltbl *tbl, int id) +{ + return audit_syscall_to_name(id, tbl->audit_machine); +} + +int syscalltbl__id(struct syscalltbl *tbl, const char *name) +{ + return audit_name_to_syscall(name, tbl->audit_machine); +} +#endif /* HAVE_SYSCALL_TABLE */ diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h new file mode 100644 index 000000000000..e2951510484f --- /dev/null +++ b/tools/perf/util/syscalltbl.h @@ -0,0 +1,20 @@ +#ifndef __PERF_SYSCALLTBL_H +#define __PERF_SYSCALLTBL_H + +struct syscalltbl { + union { + int audit_machine; + struct { + int nr_entries; + void *entries; + } syscalls; + }; +}; + +struct syscalltbl *syscalltbl__new(void); +void syscalltbl__delete(struct syscalltbl *tbl); + +const char *syscalltbl__name(const struct syscalltbl *tbl, int id); +int syscalltbl__id(struct syscalltbl *tbl, const char *name); + +#endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 679688e70ae7..825086aa9a08 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -22,44 +22,9 @@ #include "debug.h" #include "symbol.h" #include "comm.h" +#include "call-path.h" #include "thread-stack.h" -#define CALL_PATH_BLOCK_SHIFT 8 -#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) -#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) - -struct call_path_block { - struct call_path cp[CALL_PATH_BLOCK_SIZE]; - struct list_head node; -}; - -/** - * struct call_path_root - root of all call paths. - * @call_path: root call path - * @blocks: list of blocks to store call paths - * @next: next free space - * @sz: number of spaces - */ -struct call_path_root { - struct call_path call_path; - struct list_head blocks; - size_t next; - size_t sz; -}; - -/** - * struct call_return_processor - provides a call-back to consume call-return - * information. - * @cpr: call path root - * @process: call-back that accepts call/return information - * @data: anonymous data for call-back - */ -struct call_return_processor { - struct call_path_root *cpr; - int (*process)(struct call_return *cr, void *data); - void *data; -}; - #define STACK_GROWTH 2048 /** @@ -335,108 +300,6 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; } -static void call_path__init(struct call_path *cp, struct call_path *parent, - struct symbol *sym, u64 ip, bool in_kernel) -{ - cp->parent = parent; - cp->sym = sym; - cp->ip = sym ? 0 : ip; - cp->db_id = 0; - cp->in_kernel = in_kernel; - RB_CLEAR_NODE(&cp->rb_node); - cp->children = RB_ROOT; -} - -static struct call_path_root *call_path_root__new(void) -{ - struct call_path_root *cpr; - - cpr = zalloc(sizeof(struct call_path_root)); - if (!cpr) - return NULL; - call_path__init(&cpr->call_path, NULL, NULL, 0, false); - INIT_LIST_HEAD(&cpr->blocks); - return cpr; -} - -static void call_path_root__free(struct call_path_root *cpr) -{ - struct call_path_block *pos, *n; - - list_for_each_entry_safe(pos, n, &cpr->blocks, node) { - list_del(&pos->node); - free(pos); - } - free(cpr); -} - -static struct call_path *call_path__new(struct call_path_root *cpr, - struct call_path *parent, - struct symbol *sym, u64 ip, - bool in_kernel) -{ - struct call_path_block *cpb; - struct call_path *cp; - size_t n; - - if (cpr->next < cpr->sz) { - cpb = list_last_entry(&cpr->blocks, struct call_path_block, - node); - } else { - cpb = zalloc(sizeof(struct call_path_block)); - if (!cpb) - return NULL; - list_add_tail(&cpb->node, &cpr->blocks); - cpr->sz += CALL_PATH_BLOCK_SIZE; - } - - n = cpr->next++ & CALL_PATH_BLOCK_MASK; - cp = &cpb->cp[n]; - - call_path__init(cp, parent, sym, ip, in_kernel); - - return cp; -} - -static struct call_path *call_path__findnew(struct call_path_root *cpr, - struct call_path *parent, - struct symbol *sym, u64 ip, u64 ks) -{ - struct rb_node **p; - struct rb_node *node_parent = NULL; - struct call_path *cp; - bool in_kernel = ip >= ks; - - if (sym) - ip = 0; - - if (!parent) - return call_path__new(cpr, parent, sym, ip, in_kernel); - - p = &parent->children.rb_node; - while (*p != NULL) { - node_parent = *p; - cp = rb_entry(node_parent, struct call_path, rb_node); - - if (cp->sym == sym && cp->ip == ip) - return cp; - - if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - cp = call_path__new(cpr, parent, sym, ip, in_kernel); - if (!cp) - return NULL; - - rb_link_node(&cp->rb_node, node_parent, p); - rb_insert_color(&cp->rb_node, &parent->children); - - return cp; -} - struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, void *data), void *data) diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1528f1374c3..ad44c7944b8e 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -19,17 +19,16 @@ #include <sys/types.h> #include <linux/types.h> -#include <linux/rbtree.h> struct thread; struct comm; struct ip_callchain; struct symbol; struct dso; -struct call_return_processor; struct comm; struct perf_sample; struct addr_location; +struct call_path; /* * Call/Return flags. @@ -69,26 +68,16 @@ struct call_return { }; /** - * struct call_path - node in list of calls leading to a function call. - * @parent: call path to the parent function call - * @sym: symbol of function called - * @ip: only if sym is null, the ip of the function - * @db_id: id used for db-export - * @in_kernel: whether function is a in the kernel - * @rb_node: node in parent's tree of called functions - * @children: tree of call paths of functions called - * - * In combination with the call_return structure, the call_path structure - * defines a context-sensitve call-graph. + * struct call_return_processor - provides a call-back to consume call-return + * information. + * @cpr: call path root + * @process: call-back that accepts call/return information + * @data: anonymous data for call-back */ -struct call_path { - struct call_path *parent; - struct symbol *sym; - u64 ip; - u64 db_id; - bool in_kernel; - struct rb_node rb_node; - struct rb_root children; +struct call_return_processor { + struct call_path_root *cpr; + int (*process)(struct call_return *cr, void *data); + void *data; }; int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index dfd00c6dad6e..45fcb715a36b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -10,6 +10,8 @@ #include "comm.h" #include "unwind.h" +#include <api/fs/fs.h> + int thread__init_map_groups(struct thread *thread, struct machine *machine) { struct thread *leader; @@ -153,6 +155,23 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, return 0; } +int thread__set_comm_from_proc(struct thread *thread) +{ + char path[64]; + char *comm = NULL; + size_t sz; + int err = -1; + + if (!(snprintf(path, sizeof(path), "%d/task/%d/comm", + thread->pid_, thread->tid) >= (int)sizeof(path)) && + procfs__read_str(path, &comm, &sz) == 0) { + comm[sz - 1] = '\0'; + err = thread__set_comm(thread, comm, 0); + } + + return err; +} + const char *thread__comm_str(const struct thread *thread) { const struct comm *comm = thread__comm(thread); @@ -233,7 +252,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, struct addr_location *al) { size_t i; - const u8 const cpumodes[] = { + const u8 cpumodes[] = { PERF_RECORD_MISC_USER, PERF_RECORD_MISC_KERNEL, PERF_RECORD_MISC_GUEST_USER, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index a0ac0317affb..45fba13c800b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -9,6 +9,9 @@ #include "symbol.h" #include <strlist.h> #include <intlist.h> +#ifdef HAVE_LIBUNWIND_SUPPORT +#include <libunwind.h> +#endif struct thread_stack; @@ -32,6 +35,9 @@ struct thread { void *priv; struct thread_stack *ts; +#ifdef HAVE_LIBUNWIND_SUPPORT + unw_addr_space_t addr_space; +#endif }; struct machine; @@ -65,6 +71,8 @@ static inline int thread__set_comm(struct thread *thread, const char *comm, return __thread__set_comm(thread, comm, timestamp, false); } +int thread__set_comm_from_proc(struct thread *thread); + int thread__comm_len(struct thread *thread); struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 08afc6909953..5654fe15e036 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -94,7 +94,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) DIR *proc; int max_threads = 32, items, i; char path[256]; - struct dirent dirent, *next, **namelist = NULL; + struct dirent *dirent, **namelist = NULL; struct thread_map *threads = thread_map__alloc(max_threads); if (threads == NULL) @@ -107,16 +107,16 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) threads->nr = 0; atomic_set(&threads->refcnt, 1); - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; struct stat st; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); if (stat(path, &st) != 0) continue; @@ -260,7 +260,7 @@ struct thread_map *thread_map__new_dummy(void) return threads; } -static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) +struct thread_map *thread_map__new_by_tid_str(const char *tid_str) { struct thread_map *threads = NULL, *nt; int ntasks = 0; @@ -436,3 +436,15 @@ struct thread_map *thread_map__new_event(struct thread_map_event *event) return threads; } + +bool thread_map__has(struct thread_map *threads, pid_t pid) +{ + int i; + + for (i = 0; i < threads->nr; ++i) { + if (threads->map[i].pid == pid) + return true; + } + + return false; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 85e4c7c4fbde..bd3b971588da 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,6 +31,8 @@ void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, const char *tid, uid_t uid); +struct thread_map *thread_map__new_by_tid_str(const char *tid_str); + size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); static inline int thread_map__nr(struct thread_map *threads) @@ -55,4 +57,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) } void thread_map__read_comms(struct thread_map *threads); +bool thread_map__has(struct thread_map *threads, pid_t pid); #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 55de4cffcd4e..ac2590a3de2d 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -57,6 +57,7 @@ struct perf_tool { id_index, auxtrace_info, auxtrace_error, + time_conv, thread_map, cpu_map, stat_config, diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h new file mode 100644 index 000000000000..e97d7016d771 --- /dev/null +++ b/tools/perf/util/trigger.h @@ -0,0 +1,94 @@ +#ifndef __TRIGGER_H_ +#define __TRIGGER_H_ 1 + +#include "util/debug.h" +#include "asm/bug.h" + +/* + * Use trigger to model operations which need to be executed when + * an event (a signal, for example) is observed. + * + * States and transits: + * + * + * OFF--(on)--> READY --(hit)--> HIT + * ^ | + * | (ready) + * | | + * \_____________/ + * + * is_hit and is_ready are two key functions to query the state of + * a trigger. is_hit means the event already happen; is_ready means the + * trigger is waiting for the event. + */ + +struct trigger { + volatile enum { + TRIGGER_ERROR = -2, + TRIGGER_OFF = -1, + TRIGGER_READY = 0, + TRIGGER_HIT = 1, + } state; + const char *name; +}; + +#define TRIGGER_WARN_ONCE(t, exp) \ + WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \ + t->name, t->state, __func__) + +static inline bool trigger_is_available(struct trigger *t) +{ + return t->state >= 0; +} + +static inline bool trigger_is_error(struct trigger *t) +{ + return t->state <= TRIGGER_ERROR; +} + +static inline void trigger_on(struct trigger *t) +{ + TRIGGER_WARN_ONCE(t, TRIGGER_OFF); + t->state = TRIGGER_READY; +} + +static inline void trigger_ready(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + t->state = TRIGGER_READY; +} + +static inline void trigger_hit(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + TRIGGER_WARN_ONCE(t, TRIGGER_READY); + t->state = TRIGGER_HIT; +} + +static inline void trigger_off(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + t->state = TRIGGER_OFF; +} + +static inline void trigger_error(struct trigger *t) +{ + t->state = TRIGGER_ERROR; +} + +static inline bool trigger_is_ready(struct trigger *t) +{ + return t->state == TRIGGER_READY; +} + +static inline bool trigger_is_hit(struct trigger *t) +{ + return t->state == TRIGGER_HIT; +} + +#define DEFINE_TRIGGER(n) \ +struct trigger n = {.state = TRIGGER_OFF, .name = #n} +#endif diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index a8b78f1b3243..d5b11e2b85e0 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -3,10 +3,29 @@ #include <linux/types.h> -#include "../arch/x86/util/tsc.h" +#include "event.h" + +struct perf_tsc_conversion { + u16 time_shift; + u32 time_mult; + u64 time_zero; +}; +struct perf_event_mmap_page; + +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc); u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +struct perf_event_mmap_page; +struct perf_tool; +struct machine; + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); + #endif diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index ee7e372297e5..63687d3a344e 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -32,6 +32,7 @@ #include "symbol.h" #include "util.h" #include "debug.h" +#include "asm/bug.h" extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, @@ -580,43 +581,33 @@ static unw_accessors_t accessors = { int unwind__prepare_access(struct thread *thread) { - unw_addr_space_t addr_space; - if (callchain_param.record_mode != CALLCHAIN_DWARF) return 0; - addr_space = unw_create_addr_space(&accessors, 0); - if (!addr_space) { + thread->addr_space = unw_create_addr_space(&accessors, 0); + if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); return -ENOMEM; } - unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL); - thread__set_priv(thread, addr_space); - + unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL); return 0; } void unwind__flush_access(struct thread *thread) { - unw_addr_space_t addr_space; - if (callchain_param.record_mode != CALLCHAIN_DWARF) return; - addr_space = thread__priv(thread); - unw_flush_cache(addr_space, 0, 0); + unw_flush_cache(thread->addr_space, 0, 0); } void unwind__finish_access(struct thread *thread) { - unw_addr_space_t addr_space; - if (callchain_param.record_mode != CALLCHAIN_DWARF) return; - addr_space = thread__priv(thread); - unw_destroy_addr_space(addr_space); + unw_destroy_addr_space(thread->addr_space); } static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, @@ -639,7 +630,9 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, * unwind itself. */ if (max_stack - 1 > 0) { - addr_space = thread__priv(ui->thread); + WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); + addr_space = ui->thread->addr_space; + if (addr_space == NULL) return -1; diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 6adfa18cdd4e..996046a66fe5 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -41,15 +41,9 @@ static void warn_builtin(const char *warn, va_list params) /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; static void (*error_routine)(const char *err, va_list params) = error_builtin; static void (*warn_routine)(const char *err, va_list params) = warn_builtin; -void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) -{ - die_routine = routine; -} - void set_warning_routine(void (*routine)(const char *err, va_list params)) { warn_routine = routine; @@ -65,7 +59,7 @@ void die(const char *err, ...) va_list params; va_start(params, err); - die_routine(err, params); + die_builtin(err, params); va_end(params); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b7766c577b01..eab077ad6ca9 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -33,6 +33,8 @@ struct callchain_param callchain_param = { unsigned int page_size; int cacheline_size; +unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; + bool test_attr__enabled; bool perf_host = true; @@ -117,6 +119,40 @@ int rm_rf(char *path) return rmdir(path); } +/* A filter which removes dot files */ +bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) +{ + return d->d_name[0] != '.'; +} + +/* lsdir reads a directory and store it in strlist */ +struct strlist *lsdir(const char *name, + bool (*filter)(const char *, struct dirent *)) +{ + struct strlist *list = NULL; + DIR *dir; + struct dirent *d; + + dir = opendir(name); + if (!dir) + return NULL; + + list = strlist__new(NULL, NULL); + if (!list) { + errno = ENOMEM; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!filter || filter(name, d)) + strlist__add(list, d->d_name); + } + +out: + closedir(dir); + return list; +} + static int slow_copyfile(const char *from, const char *to) { int err = -1; @@ -471,7 +507,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) "needed for --call-graph fp\n"); break; -#ifdef HAVE_DWARF_UNWIND_SUPPORT /* Dwarf style */ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { const unsigned long default_stack_dump_size = 8192; @@ -487,7 +522,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) ret = get_stack_size(tok, &size); param->dump_size = size; } -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ } else if (!strncmp(name, "lbr", sizeof("lbr"))) { if (!strtok_r(NULL, ",", &saveptr)) { param->record_mode = CALLCHAIN_LBR; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d0d50cef8b2a..7651633a8dc7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -79,6 +79,7 @@ #include <termios.h> #include <linux/bitops.h> #include <termios.h> +#include "strlist.h" extern const char *graph_line; extern const char *graph_dotted_line; @@ -133,25 +134,15 @@ extern char buildid_dir[]; #define PERF_GTK_DSO "libperf-gtk.so" /* General helper functions */ -extern void usage(const char *err) NORETURN; -extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +void usage(const char *err) NORETURN; +void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); -#include "../../../include/linux/stringify.h" +void set_warning_routine(void (*routine)(const char *err, va_list params)); -#define DIE_IF(cnd) \ - do { if (cnd) \ - die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ - __stringify(cnd) "\n"); \ - } while (0) - - -extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); -extern void set_warning_routine(void (*routine)(const char *err, va_list params)); - -extern int prefixcmp(const char *str, const char *prefix); -extern void set_buildid_dir(const char *dir); +int prefixcmp(const char *str, const char *prefix); +void set_buildid_dir(const char *dir); #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 1) @@ -169,13 +160,6 @@ static inline char *gitstrchrnul(const char *s, int c) } #endif -/* - * Wrappers: - */ -extern char *xstrdup(const char *str); -extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); - - static inline void *zalloc(size_t size) { return calloc(1, size); @@ -233,6 +217,8 @@ static inline int sane_case(int x, int high) int mkdir_p(char *path, mode_t mode); int rm_rf(char *path); +struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); +bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); @@ -265,11 +251,17 @@ int hex2u64(const char *ptr, u64 *val); char *ltrim(char *s); char *rtrim(char *s); +static inline char *trim(char *s) +{ + return ltrim(rtrim(s)); +} + void dump_stack(void); void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; +extern unsigned int sysctl_perf_event_max_stack; struct parse_tag { char tag; diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c deleted file mode 100644 index 19f15b650703..000000000000 --- a/tools/perf/util/wrapper.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Various trivial helper wrappers around standard functions - */ -#include "cache.h" - -/* - * There's no pack memory to release - but stay close to the Git - * version so wrap this away: - */ -static inline void release_pack_memory(size_t size __maybe_unused, - int flag __maybe_unused) -{ -} - -char *xstrdup(const char *str) -{ - char *ret = strdup(str); - if (!ret) { - release_pack_memory(strlen(str) + 1, -1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } - return ret; -} - -void *xrealloc(void *ptr, size_t size) -{ - void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - release_pack_memory(size, -1); - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } - return ret; -} diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index d0e6b857d8d1..546cf4a503b7 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -91,7 +91,7 @@ osl_get_customized_table(char *pathname, char *signature, u32 instance, struct acpi_table_header **table, - acpi_physical_address * address); + acpi_physical_address *address); static acpi_status osl_list_bios_tables(void); @@ -99,7 +99,7 @@ static acpi_status osl_get_bios_table(char *signature, u32 instance, struct acpi_table_header **table, - acpi_physical_address * address); + acpi_physical_address *address); static acpi_status osl_get_last_status(acpi_status default_status); @@ -187,7 +187,7 @@ static acpi_status osl_get_last_status(acpi_status default_status) acpi_status acpi_os_get_table_by_address(acpi_physical_address address, - struct acpi_table_header ** table) + struct acpi_table_header **table) { u32 table_length; struct acpi_table_header *mapped_table; @@ -252,8 +252,8 @@ exit: acpi_status acpi_os_get_table_by_name(char *signature, u32 instance, - struct acpi_table_header ** table, - acpi_physical_address * address) + struct acpi_table_header **table, + acpi_physical_address *address) { acpi_status status; @@ -380,8 +380,8 @@ static acpi_status osl_add_table_to_list(char *signature, u32 instance) acpi_status acpi_os_get_table_by_index(u32 index, - struct acpi_table_header ** table, - u32 *instance, acpi_physical_address * address) + struct acpi_table_header **table, + u32 *instance, acpi_physical_address *address) { struct osl_table_info *info; acpi_status status; @@ -447,7 +447,7 @@ osl_find_rsdp_via_efi_by_keyword(FILE * file, const char *keyword) } } - return ((acpi_physical_address) (address)); + return ((acpi_physical_address)(address)); } /****************************************************************************** @@ -751,10 +751,10 @@ static acpi_status osl_list_bios_tables(void) for (i = 0; i < number_of_tables; ++i, table_data += item_size) { if (osl_can_use_xsdt()) { table_address = - (acpi_physical_address) (*ACPI_CAST64(table_data)); + (acpi_physical_address)(*ACPI_CAST64(table_data)); } else { table_address = - (acpi_physical_address) (*ACPI_CAST32(table_data)); + (acpi_physical_address)(*ACPI_CAST32(table_data)); } /* Skip NULL entries in RSDT/XSDT */ @@ -800,7 +800,7 @@ static acpi_status osl_get_bios_table(char *signature, u32 instance, struct acpi_table_header **table, - acpi_physical_address * address) + acpi_physical_address *address) { struct acpi_table_header *local_table = NULL; struct acpi_table_header *mapped_table = NULL; @@ -833,38 +833,37 @@ osl_get_bios_table(char *signature, if ((gbl_fadt->header.length >= MIN_FADT_FOR_XDSDT) && gbl_fadt->Xdsdt) { table_address = - (acpi_physical_address) gbl_fadt->Xdsdt; + (acpi_physical_address)gbl_fadt->Xdsdt; } else if ((gbl_fadt->header.length >= MIN_FADT_FOR_DSDT) && gbl_fadt->dsdt) { table_address = - (acpi_physical_address) gbl_fadt->dsdt; + (acpi_physical_address)gbl_fadt->dsdt; } } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) { if ((gbl_fadt->header.length >= MIN_FADT_FOR_XFACS) && gbl_fadt->Xfacs) { table_address = - (acpi_physical_address) gbl_fadt->Xfacs; + (acpi_physical_address)gbl_fadt->Xfacs; } else if ((gbl_fadt->header.length >= MIN_FADT_FOR_FACS) && gbl_fadt->facs) { table_address = - (acpi_physical_address) gbl_fadt->facs; + (acpi_physical_address)gbl_fadt->facs; } } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) { if (!gbl_revision) { return (AE_BAD_SIGNATURE); } table_address = - (acpi_physical_address) gbl_rsdp. + (acpi_physical_address)gbl_rsdp. xsdt_physical_address; } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) { table_address = - (acpi_physical_address) gbl_rsdp. + (acpi_physical_address)gbl_rsdp. rsdt_physical_address; } else { - table_address = - (acpi_physical_address) gbl_rsdp_address; + table_address = (acpi_physical_address)gbl_rsdp_address; signature = ACPI_SIG_RSDP; } @@ -904,12 +903,12 @@ osl_get_bios_table(char *signature, for (i = 0; i < number_of_tables; ++i, table_data += item_size) { if (osl_can_use_xsdt()) { table_address = - (acpi_physical_address) (*ACPI_CAST64 - (table_data)); + (acpi_physical_address)(*ACPI_CAST64 + (table_data)); } else { table_address = - (acpi_physical_address) (*ACPI_CAST32 - (table_data)); + (acpi_physical_address)(*ACPI_CAST32 + (table_data)); } /* Skip NULL entries in RSDT/XSDT */ @@ -1301,7 +1300,7 @@ osl_get_customized_table(char *pathname, char *signature, u32 instance, struct acpi_table_header **table, - acpi_physical_address * address) + acpi_physical_address *address) { void *table_dir; u32 current_instance = 0; diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 3818fd07e50f..cbfbce18783d 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#if defined(_dragon_fly) || defined(_free_BSD) +#if defined(_dragon_fly) || defined(_free_BSD) || defined(_QNX) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 08cb8b2035f2..88aa66ef4ad5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -246,8 +246,8 @@ acpi_physical_address acpi_os_get_root_pointer(void) *****************************************************************************/ acpi_status -acpi_os_predefined_override(const struct acpi_predefined_names * init_val, - acpi_string * new_val) +acpi_os_predefined_override(const struct acpi_predefined_names *init_val, + acpi_string *new_val) { if (!init_val || !new_val) { @@ -274,8 +274,8 @@ acpi_os_predefined_override(const struct acpi_predefined_names * init_val, *****************************************************************************/ acpi_status -acpi_os_table_override(struct acpi_table_header * existing_table, - struct acpi_table_header ** new_table) +acpi_os_table_override(struct acpi_table_header *existing_table, + struct acpi_table_header **new_table) { if (!existing_table || !new_table) { @@ -311,8 +311,8 @@ acpi_os_table_override(struct acpi_table_header * existing_table, *****************************************************************************/ acpi_status -acpi_os_physical_table_override(struct acpi_table_header * existing_table, - acpi_physical_address * new_address, +acpi_os_physical_table_override(struct acpi_table_header *existing_table, + acpi_physical_address *new_address, u32 *new_table_length) { @@ -506,7 +506,7 @@ acpi_status acpi_os_get_line(char *buffer, u32 buffer_length, u32 *bytes_read) void *acpi_os_map_memory(acpi_physical_address where, acpi_size length) { - return (ACPI_TO_POINTER((acpi_size) where)); + return (ACPI_TO_POINTER((acpi_size)where)); } /****************************************************************************** @@ -603,9 +603,9 @@ void acpi_os_free(void *mem) acpi_status acpi_os_create_semaphore(u32 max_units, - u32 initial_units, acpi_handle * out_handle) + u32 initial_units, acpi_handle *out_handle) { - *out_handle = (acpi_handle) 1; + *out_handle = (acpi_handle)1; return (AE_OK); } @@ -640,7 +640,7 @@ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) acpi_status acpi_os_create_semaphore(u32 max_units, - u32 initial_units, acpi_handle * out_handle) + u32 initial_units, acpi_handle *out_handle) { sem_t *sem; @@ -672,7 +672,7 @@ acpi_os_create_semaphore(u32 max_units, } #endif - *out_handle = (acpi_handle) sem; + *out_handle = (acpi_handle)sem; return (AE_OK); } @@ -1035,7 +1035,7 @@ acpi_os_read_pci_configuration(struct acpi_pci_id *pci_id, *****************************************************************************/ acpi_status -acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id, +acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id, u32 pci_register, u64 value, u32 width) { diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c index d070fccdba6d..a88ac45b7756 100644 --- a/tools/power/acpi/tools/acpidbg/acpidbg.c +++ b/tools/power/acpi/tools/acpidbg/acpidbg.c @@ -375,7 +375,7 @@ void usage(FILE *file, char *progname) int main(int argc, char **argv) { - int fd = 0; + int fd = -1; int ch; int len; int ret = EXIT_SUCCESS; @@ -430,7 +430,7 @@ int main(int argc, char **argv) acpi_aml_loop(fd); exit: - if (fd < 0) + if (fd >= 0) close(fd); if (acpi_aml_batch_cmd) free(acpi_aml_batch_cmd); diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile index 8d761576e91b..2942cdced2ad 100644 --- a/tools/power/acpi/tools/acpidump/Makefile +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -31,6 +31,7 @@ TOOL_OBJS = \ osunixxf.o\ tbprint.o\ tbxfroot.o\ + utascii.o\ utbuffer.o\ utdebug.o\ utexcep.o\ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index da44458d3b6c..fb8f1d9e3b1b 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -68,7 +68,7 @@ u8 ap_is_valid_header(struct acpi_table_header *table) /* Make sure signature is all ASCII and a valid ACPI name */ - if (!acpi_ut_valid_acpi_name(table->signature)) { + if (!acpi_ut_valid_nameseg(table->signature)) { acpi_log_error("Table signature (0x%8.8X) is invalid\n", *(u32 *)table->signature); return (FALSE); @@ -286,14 +286,15 @@ int ap_dump_table_by_address(char *ascii_address) /* Convert argument to an integer physical address */ - status = acpi_ut_strtoul64(ascii_address, 0, &long_address); + status = acpi_ut_strtoul64(ascii_address, ACPI_ANY_BASE, + ACPI_MAX64_BYTE_WIDTH, &long_address); if (ACPI_FAILURE(status)) { acpi_log_error("%s: Could not convert to a physical address\n", ascii_address); return (-1); } - address = (acpi_physical_address) long_address; + address = (acpi_physical_address)long_address; status = acpi_os_get_table_by_address(address, &table); if (ACPI_FAILURE(status)) { acpi_log_error("Could not get table at 0x%8.8X%8.8X, %s\n", @@ -406,6 +407,12 @@ int ap_dump_table_from_file(char *pathname) return (-1); } + if (!acpi_ut_valid_nameseg(table->signature)) { + acpi_log_error + ("No valid ACPI signature was found in input file %s\n", + pathname); + } + /* File must be at least as long as the table length */ if (table->length > file_size) { diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index c3c09152fac6..7692e6b887e1 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -209,7 +209,8 @@ static int ap_do_options(int argc, char **argv) case 'r': /* Dump tables from specified RSDP */ status = - acpi_ut_strtoul64(acpi_gbl_optarg, 0, + acpi_ut_strtoul64(acpi_gbl_optarg, ACPI_ANY_BASE, + ACPI_MAX64_BYTE_WIDTH, &gbl_rsdp_base); if (ACPI_FAILURE(status)) { acpi_log_error diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 0adaf0c7c03a..8358863259c5 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -63,7 +63,7 @@ DESTDIR ?= # and _should_ modify the PACKAGE_BUGREPORT definition VERSION= $(shell ./utils/version-gen.sh) -LIB_MAJ= 0.0.0 +LIB_MAJ= 0.0.1 LIB_MIN= 0 PACKAGE = cpupower @@ -129,7 +129,7 @@ WARNINGS += -Wshadow CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \ -DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE -UTIL_OBJS = utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \ +UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \ utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \ utils/helpers/pci.o utils/helpers/bitmask.o \ utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \ @@ -148,9 +148,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -LIB_HEADERS = lib/cpufreq.h lib/sysfs.h -LIB_SRC = lib/cpufreq.c lib/sysfs.c -LIB_OBJS = lib/cpufreq.o lib/sysfs.o +LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h +LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c +LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) CFLAGS += -pipe @@ -280,6 +280,7 @@ install-lib: $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h + $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h install-tools: $(INSTALL) -d $(DESTDIR)${bindir} @@ -315,6 +316,7 @@ endif uninstall: - rm -f $(DESTDIR)${libdir}/libcpupower.* - rm -f $(DESTDIR)${includedir}/cpufreq.h + - rm -f $(DESTDIR)${includedir}/cpuidle.h - rm -f $(DESTDIR)${bindir}/utils/cpupower - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d0f879b223fc..3e59f1aa3947 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -22,7 +22,7 @@ $(OUTPUT)%.o : %.c $(OUTPUT)cpufreq-bench: $(OBJS) $(ECHO) " CC " $@ - $(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS) + $(QUIET) $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) all: $(OUTPUT)cpufreq-bench diff --git a/tools/power/cpupower/bench/README-BENCH b/tools/power/cpupower/bench/README-BENCH index 8093ec738170..97727aed61cc 100644 --- a/tools/power/cpupower/bench/README-BENCH +++ b/tools/power/cpupower/bench/README-BENCH @@ -113,7 +113,7 @@ cpufreq-bench Command Usage -c, --cpu=<unsigned int> CPU Number to use, starting at 0 -p, --prio=<priority> scheduler priority, HIGH, LOW or DEFAULT -g, --governor=<governor> cpufreq governor to test --n, --cycles=<int> load/sleep cycles to get an avarage value to compare +-n, --cycles=<int> load/sleep cycles to get an average value to compare -r, --rounds<int> load/sleep rounds -f, --file=<configfile> config file to use -o, --output=<dir> output dir, must exist diff --git a/tools/power/cpupower/bench/benchmark.c b/tools/power/cpupower/bench/benchmark.c index 81b1c48607d9..429d51ab8031 100644 --- a/tools/power/cpupower/bench/benchmark.c +++ b/tools/power/cpupower/bench/benchmark.c @@ -130,7 +130,7 @@ void start_benchmark(struct config *config) _round, load_time, sleep_time); if (config->verbose) - printf("avarage: %lius, rps:%li\n", + printf("average: %lius, rps:%li\n", load_time / calculations, 1000000 * calculations / load_time); @@ -177,7 +177,7 @@ void start_benchmark(struct config *config) progress_time += sleep_time + load_time; - /* compare the avarage sleep/load cycles */ + /* compare the average sleep/load cycles */ fprintf(config->output, "%li ", powersave_time / config->cycles); fprintf(config->output, "%.3f\n", diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index f503fb53824e..9b65f052081f 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -65,7 +65,7 @@ FILE *prepare_output(const char *dirname) { FILE *output = NULL; int len; - char *filename; + char *filename, *filename_tmp; struct utsname sysdata; DIR *dir; @@ -81,16 +81,22 @@ FILE *prepare_output(const char *dirname) len = strlen(dirname) + 30; filename = malloc(sizeof(char) * len); + if (!filename) { + perror("malloc"); + goto out_dir; + } if (uname(&sysdata) == 0) { len += strlen(sysdata.nodename) + strlen(sysdata.release); - filename = realloc(filename, sizeof(char) * len); + filename_tmp = realloc(filename, sizeof(*filename) * len); - if (filename == NULL) { + if (filename_tmp == NULL) { + free(filename); perror("realloc"); - return NULL; + goto out_dir; } + filename = filename_tmp; snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log", dirname, sysdata.nodename, sysdata.release, time(NULL)); } else { @@ -104,12 +110,16 @@ FILE *prepare_output(const char *dirname) if (output == NULL) { perror("fopen"); fprintf(stderr, "error: unable to open logfile\n"); + goto out; } fprintf(stdout, "Logfile: %s\n", filename); - free(filename); fprintf(output, "#round load sleep performance powersave percentage\n"); +out: + free(filename); +out_dir: + closedir(dir); return output; } diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index f01e3f4be84c..c25a74ae51ba 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -26,6 +26,7 @@ #include <sched.h> #include <cpufreq.h> +#include <cpupower.h> #include "config.h" #include "system.h" @@ -60,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpufreq_cpu_exists(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 0) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index d961101d1cea..1b993fe1ce23 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -9,28 +9,190 @@ #include <errno.h> #include <stdlib.h> #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> #include "cpufreq.h" -#include "sysfs.h" +#include "cpupower_intern.h" -int cpufreq_cpu_exists(unsigned int cpu) +/* CPUFREQ sysfs access **************************************************/ + +/* helper function to read file from /sys into given buffer */ +/* fname is a relative path under "cpuX/cpufreq" dir */ +static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, + char *buf, size_t buflen) { - return sysfs_cpu_exists(cpu); + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", + cpu, fname); + return sysfs_read_file(path, buf, buflen); } +/* helper function to write a new value to a /sys file */ +/* fname is a relative path under "cpuX/cpufreq" dir */ +static unsigned int sysfs_cpufreq_write_file(unsigned int cpu, + const char *fname, + const char *value, size_t len) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numwrite; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", + cpu, fname); + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwrite = write(fd, value, len); + if (numwrite < 1) { + close(fd); + return 0; + } + + close(fd); + + return (unsigned int) numwrite; +} + +/* read access to files which contain one numeric value */ + +enum cpufreq_value { + CPUINFO_CUR_FREQ, + CPUINFO_MIN_FREQ, + CPUINFO_MAX_FREQ, + CPUINFO_LATENCY, + SCALING_CUR_FREQ, + SCALING_MIN_FREQ, + SCALING_MAX_FREQ, + STATS_NUM_TRANSITIONS, + MAX_CPUFREQ_VALUE_READ_FILES +}; + +static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { + [CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq", + [CPUINFO_MIN_FREQ] = "cpuinfo_min_freq", + [CPUINFO_MAX_FREQ] = "cpuinfo_max_freq", + [CPUINFO_LATENCY] = "cpuinfo_transition_latency", + [SCALING_CUR_FREQ] = "scaling_cur_freq", + [SCALING_MIN_FREQ] = "scaling_min_freq", + [SCALING_MAX_FREQ] = "scaling_max_freq", + [STATS_NUM_TRANSITIONS] = "stats/total_trans" +}; + + +static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, + enum cpufreq_value which) +{ + unsigned long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + + if (which >= MAX_CPUFREQ_VALUE_READ_FILES) + return 0; + + len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], + linebuf, sizeof(linebuf)); + + if (len == 0) + return 0; + + value = strtoul(linebuf, &endp, 0); + + if (endp == linebuf || errno == ERANGE) + return 0; + + return value; +} + +/* read access to files which contain one string */ + +enum cpufreq_string { + SCALING_DRIVER, + SCALING_GOVERNOR, + MAX_CPUFREQ_STRING_FILES +}; + +static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { + [SCALING_DRIVER] = "scaling_driver", + [SCALING_GOVERNOR] = "scaling_governor", +}; + + +static char *sysfs_cpufreq_get_one_string(unsigned int cpu, + enum cpufreq_string which) +{ + char linebuf[MAX_LINE_LEN]; + char *result; + unsigned int len; + + if (which >= MAX_CPUFREQ_STRING_FILES) + return NULL; + + len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + result = strdup(linebuf); + if (result == NULL) + return NULL; + + if (result[strlen(result) - 1] == '\n') + result[strlen(result) - 1] = '\0'; + + return result; +} + +/* write access */ + +enum cpufreq_write { + WRITE_SCALING_MIN_FREQ, + WRITE_SCALING_MAX_FREQ, + WRITE_SCALING_GOVERNOR, + WRITE_SCALING_SET_SPEED, + MAX_CPUFREQ_WRITE_FILES +}; + +static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = { + [WRITE_SCALING_MIN_FREQ] = "scaling_min_freq", + [WRITE_SCALING_MAX_FREQ] = "scaling_max_freq", + [WRITE_SCALING_GOVERNOR] = "scaling_governor", + [WRITE_SCALING_SET_SPEED] = "scaling_setspeed", +}; + +static int sysfs_cpufreq_write_one_value(unsigned int cpu, + enum cpufreq_write which, + const char *new_value, size_t len) +{ + if (which >= MAX_CPUFREQ_WRITE_FILES) + return 0; + + if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which], + new_value, len) != len) + return -ENODEV; + + return 0; +}; + unsigned long cpufreq_get_freq_kernel(unsigned int cpu) { - return sysfs_get_freq_kernel(cpu); + return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ); } unsigned long cpufreq_get_freq_hardware(unsigned int cpu) { - return sysfs_get_freq_hardware(cpu); + return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ); } unsigned long cpufreq_get_transition_latency(unsigned int cpu) { - return sysfs_get_freq_transition_latency(cpu); + return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY); } int cpufreq_get_hardware_limits(unsigned int cpu, @@ -39,12 +201,21 @@ int cpufreq_get_hardware_limits(unsigned int cpu, { if ((!min) || (!max)) return -EINVAL; - return sysfs_get_freq_hardware_limits(cpu, min, max); + + *min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ); + if (!*min) + return -ENODEV; + + *max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ); + if (!*max) + return -ENODEV; + + return 0; } char *cpufreq_get_driver(unsigned int cpu) { - return sysfs_get_freq_driver(cpu); + return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER); } void cpufreq_put_driver(char *ptr) @@ -56,7 +227,26 @@ void cpufreq_put_driver(char *ptr) struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu) { - return sysfs_get_freq_policy(cpu); + struct cpufreq_policy *policy; + + policy = malloc(sizeof(struct cpufreq_policy)); + if (!policy) + return NULL; + + policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR); + if (!policy->governor) { + free(policy); + return NULL; + } + policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ); + policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ); + if ((!policy->min) || (!policy->max)) { + free(policy->governor); + free(policy); + return NULL; + } + + return policy; } void cpufreq_put_policy(struct cpufreq_policy *policy) @@ -72,7 +262,57 @@ void cpufreq_put_policy(struct cpufreq_policy *policy) struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned int cpu) { - return sysfs_get_freq_available_governors(cpu); + struct cpufreq_available_governors *first = NULL; + struct cpufreq_available_governors *current = NULL; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors", + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + pos = 0; + for (i = 0; i < len; i++) { + if (linebuf[i] == ' ' || linebuf[i] == '\n') { + if (i - pos < 2) + continue; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + current->governor = malloc(i - pos + 1); + if (!current->governor) + goto error_out; + + memcpy(current->governor, linebuf + pos, i - pos); + current->governor[i - pos] = '\0'; + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + if (first->governor) + free(first->governor); + free(first); + first = current; + } + return NULL; } void cpufreq_put_available_governors(struct cpufreq_available_governors *any) @@ -96,7 +336,57 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any) struct cpufreq_available_frequencies *cpufreq_get_available_frequencies(unsigned int cpu) { - return sysfs_get_available_frequencies(cpu); + struct cpufreq_available_frequencies *first = NULL; + struct cpufreq_available_frequencies *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies", + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + pos = 0; + for (i = 0; i < len; i++) { + if (linebuf[i] == ' ' || linebuf[i] == '\n') { + if (i - pos < 2) + continue; + if (i - pos >= SYSFS_PATH_MAX) + goto error_out; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + memcpy(one_value, linebuf + pos, i - pos); + one_value[i - pos] = '\0'; + if (sscanf(one_value, "%lu", ¤t->frequency) != 1) + goto error_out; + + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + free(first); + first = current; + } + return NULL; } void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies @@ -114,10 +404,65 @@ void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies } } +static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, + const char *file) +{ + struct cpufreq_affected_cpus *first = NULL; + struct cpufreq_affected_cpus *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + pos = 0; + for (i = 0; i < len; i++) { + if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') { + if (i - pos < 1) + continue; + if (i - pos >= SYSFS_PATH_MAX) + goto error_out; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + memcpy(one_value, linebuf + pos, i - pos); + one_value[i - pos] = '\0'; + + if (sscanf(one_value, "%u", ¤t->cpu) != 1) + goto error_out; + + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + free(first); + first = current; + } + return NULL; +} struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu) { - return sysfs_get_freq_affected_cpus(cpu); + return sysfs_get_cpu_list(cpu, "affected_cpus"); } void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any) @@ -138,7 +483,7 @@ void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any) struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu) { - return sysfs_get_freq_related_cpus(cpu); + return sysfs_get_cpu_list(cpu, "related_cpus"); } void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any) @@ -146,45 +491,208 @@ void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any) cpufreq_put_affected_cpus(any); } +static int verify_gov(char *new_gov, char *passed_gov) +{ + unsigned int i, j = 0; + + if (!passed_gov || (strlen(passed_gov) > 19)) + return -EINVAL; + + strncpy(new_gov, passed_gov, 20); + for (i = 0; i < 20; i++) { + if (j) { + new_gov[i] = '\0'; + continue; + } + if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z')) + continue; + + if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z')) + continue; + + if (new_gov[i] == '-') + continue; + + if (new_gov[i] == '_') + continue; + + if (new_gov[i] == '\0') { + j = 1; + continue; + } + return -EINVAL; + } + new_gov[19] = '\0'; + return 0; +} int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy) { + char min[SYSFS_PATH_MAX]; + char max[SYSFS_PATH_MAX]; + char gov[SYSFS_PATH_MAX]; + int ret; + unsigned long old_min; + int write_max_first; + if (!policy || !(policy->governor)) return -EINVAL; - return sysfs_set_freq_policy(cpu, policy); + if (policy->max < policy->min) + return -EINVAL; + + if (verify_gov(gov, policy->governor)) + return -EINVAL; + + snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min); + snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max); + + old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ); + write_max_first = (old_min && (policy->max < old_min) ? 0 : 1); + + if (write_max_first) { + ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, + max, strlen(max)); + if (ret) + return ret; + } + + ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min, + strlen(min)); + if (ret) + return ret; + + if (!write_max_first) { + ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, + max, strlen(max)); + if (ret) + return ret; + } + + return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR, + gov, strlen(gov)); } int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq) { - return sysfs_modify_freq_policy_min(cpu, min_freq); + char value[SYSFS_PATH_MAX]; + + snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq); + + return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, + value, strlen(value)); } int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq) { - return sysfs_modify_freq_policy_max(cpu, max_freq); -} + char value[SYSFS_PATH_MAX]; + + snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq); + return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, + value, strlen(value)); +} int cpufreq_modify_policy_governor(unsigned int cpu, char *governor) { + char new_gov[SYSFS_PATH_MAX]; + if ((!governor) || (strlen(governor) > 19)) return -EINVAL; - return sysfs_modify_freq_policy_governor(cpu, governor); + if (verify_gov(new_gov, governor)) + return -EINVAL; + + return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR, + new_gov, strlen(new_gov)); } int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency) { - return sysfs_set_frequency(cpu, target_frequency); + struct cpufreq_policy *pol = cpufreq_get_policy(cpu); + char userspace_gov[] = "userspace"; + char freq[SYSFS_PATH_MAX]; + int ret; + + if (!pol) + return -ENODEV; + + if (strncmp(pol->governor, userspace_gov, 9) != 0) { + ret = cpufreq_modify_policy_governor(cpu, userspace_gov); + if (ret) { + cpufreq_put_policy(pol); + return ret; + } + } + + cpufreq_put_policy(pol); + + snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency); + + return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED, + freq, strlen(freq)); } struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, unsigned long long *total_time) { - return sysfs_get_freq_stats(cpu, total_time); + struct cpufreq_stats *first = NULL; + struct cpufreq_stats *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state", + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + *total_time = 0; + pos = 0; + for (i = 0; i < len; i++) { + if (i == strlen(linebuf) || linebuf[i] == '\n') { + if (i - pos < 2) + continue; + if ((i - pos) >= SYSFS_PATH_MAX) + goto error_out; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + memcpy(one_value, linebuf + pos, i - pos); + one_value[i - pos] = '\0'; + if (sscanf(one_value, "%lu %llu", + ¤t->frequency, + ¤t->time_in_state) != 2) + goto error_out; + + *total_time = *total_time + current->time_in_state; + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + free(first); + first = current; + } + return NULL; } void cpufreq_put_stats(struct cpufreq_stats *any) @@ -204,5 +712,5 @@ void cpufreq_put_stats(struct cpufreq_stats *any) unsigned long cpufreq_get_transitions(unsigned int cpu) { - return sysfs_get_freq_transitions(cpu); + return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS); } diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 3aae8e7a0839..3b005c39f068 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -17,8 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _CPUFREQ_H -#define _CPUFREQ_H 1 +#ifndef __CPUPOWER_CPUFREQ_H__ +#define __CPUPOWER_CPUFREQ_H__ struct cpufreq_policy { unsigned long min; @@ -58,13 +58,6 @@ struct cpufreq_stats { extern "C" { #endif -/* - * returns 0 if the specified CPU is present (it doesn't say - * whether it is online!), and an error value if not. - */ - -extern int cpufreq_cpu_exists(unsigned int cpu); - /* determine current CPU frequency * - _kernel variant means kernel's opinion of CPU frequency * - _hardware variant means actual hardware CPU frequency, @@ -73,9 +66,9 @@ extern int cpufreq_cpu_exists(unsigned int cpu); * returns 0 on failure, else frequency in kHz. */ -extern unsigned long cpufreq_get_freq_kernel(unsigned int cpu); +unsigned long cpufreq_get_freq_kernel(unsigned int cpu); -extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu); +unsigned long cpufreq_get_freq_hardware(unsigned int cpu); #define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu); @@ -84,7 +77,7 @@ extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu); * * returns 0 on failure, else transition latency in 10^(-9) s = nanoseconds */ -extern unsigned long cpufreq_get_transition_latency(unsigned int cpu); +unsigned long cpufreq_get_transition_latency(unsigned int cpu); /* determine hardware CPU frequency limits @@ -93,7 +86,7 @@ extern unsigned long cpufreq_get_transition_latency(unsigned int cpu); * considerations by cpufreq policy notifiers in the kernel. */ -extern int cpufreq_get_hardware_limits(unsigned int cpu, +int cpufreq_get_hardware_limits(unsigned int cpu, unsigned long *min, unsigned long *max); @@ -104,9 +97,9 @@ extern int cpufreq_get_hardware_limits(unsigned int cpu, * to avoid memory leakage, please. */ -extern char *cpufreq_get_driver(unsigned int cpu); +char *cpufreq_get_driver(unsigned int cpu); -extern void cpufreq_put_driver(char *ptr); +void cpufreq_put_driver(char *ptr); /* determine CPUfreq policy currently used @@ -116,9 +109,9 @@ extern void cpufreq_put_driver(char *ptr); */ -extern struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu); +struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu); -extern void cpufreq_put_policy(struct cpufreq_policy *policy); +void cpufreq_put_policy(struct cpufreq_policy *policy); /* determine CPUfreq governors currently available @@ -129,10 +122,10 @@ extern void cpufreq_put_policy(struct cpufreq_policy *policy); */ -extern struct cpufreq_available_governors +struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned int cpu); -extern void cpufreq_put_available_governors( +void cpufreq_put_available_governors( struct cpufreq_available_governors *first); @@ -143,10 +136,10 @@ extern void cpufreq_put_available_governors( * cpufreq_put_available_frequencies after use. */ -extern struct cpufreq_available_frequencies +struct cpufreq_available_frequencies *cpufreq_get_available_frequencies(unsigned int cpu); -extern void cpufreq_put_available_frequencies( +void cpufreq_put_available_frequencies( struct cpufreq_available_frequencies *first); @@ -156,10 +149,10 @@ extern void cpufreq_put_available_frequencies( * to avoid memory leakage, please. */ -extern struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned +struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu); -extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); +void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); /* determine related CPUs @@ -168,10 +161,10 @@ extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); * to avoid memory leakage, please. */ -extern struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned +struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu); -extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); +void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); /* determine stats for cpufreq subsystem @@ -179,12 +172,12 @@ extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); * This is not available in all kernel versions or configurations. */ -extern struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, +struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, unsigned long long *total_time); -extern void cpufreq_put_stats(struct cpufreq_stats *stats); +void cpufreq_put_stats(struct cpufreq_stats *stats); -extern unsigned long cpufreq_get_transitions(unsigned int cpu); +unsigned long cpufreq_get_transitions(unsigned int cpu); /* set new cpufreq policy @@ -193,7 +186,7 @@ extern unsigned long cpufreq_get_transitions(unsigned int cpu); * but results may differ depending e.g. on governors being available. */ -extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); +int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); /* modify a policy by only changing min/max freq or governor @@ -201,9 +194,9 @@ extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); * Does not check whether result is what was intended. */ -extern int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); -extern int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq); -extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); +int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); +int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq); +int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); /* set a specific frequency @@ -213,7 +206,7 @@ extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); * occurs. Also does not work on ->range() cpufreq drivers. */ -extern int cpufreq_set_frequency(unsigned int cpu, +int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency); #ifdef __cplusplus diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c new file mode 100644 index 000000000000..9bd4c7655fdb --- /dev/null +++ b/tools/power/cpupower/lib/cpuidle.c @@ -0,0 +1,380 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * (C) 2011 Thomas Renninger <trenn@novell.com> Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "cpuidle.h" +#include "cpupower_intern.h" + +/* + * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir + * exists. + * For example the functionality to disable c-states was introduced in later + * kernel versions, this function can be used to explicitly check for this + * feature. + * + * returns 1 if the file exists, 0 otherwise. + */ +static +unsigned int cpuidle_state_file_exists(unsigned int cpu, + unsigned int idlestate, + const char *fname) +{ + char path[SYSFS_PATH_MAX]; + struct stat statbuf; + + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + if (stat(path, &statbuf) != 0) + return 0; + return 1; +} + +/* + * helper function to read file from /sys into given buffer + * fname is a relative path under "cpuX/cpuidle/stateX/" dir + * cstates starting with 0, C0 is not counted as cstate. + * This means if you want C1 info, pass 0 as idlestate param + */ +static +unsigned int cpuidle_state_read_file(unsigned int cpu, + unsigned int idlestate, + const char *fname, char *buf, + size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +/* + * helper function to write a new value to a /sys file + * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir + * + * Returns the number of bytes written or 0 on error + */ +static +unsigned int cpuidle_state_write_file(unsigned int cpu, + unsigned int idlestate, + const char *fname, + const char *value, size_t len) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numwrite; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwrite = write(fd, value, len); + if (numwrite < 1) { + close(fd); + return 0; + } + + close(fd); + + return (unsigned int) numwrite; +} + +/* read access to files which contain one numeric value */ + +enum idlestate_value { + IDLESTATE_USAGE, + IDLESTATE_POWER, + IDLESTATE_LATENCY, + IDLESTATE_TIME, + IDLESTATE_DISABLE, + MAX_IDLESTATE_VALUE_FILES +}; + +static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { + [IDLESTATE_USAGE] = "usage", + [IDLESTATE_POWER] = "power", + [IDLESTATE_LATENCY] = "latency", + [IDLESTATE_TIME] = "time", + [IDLESTATE_DISABLE] = "disable", +}; + +static +unsigned long long cpuidle_state_get_one_value(unsigned int cpu, + unsigned int idlestate, + enum idlestate_value which) +{ + unsigned long long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + + if (which >= MAX_IDLESTATE_VALUE_FILES) + return 0; + + len = cpuidle_state_read_file(cpu, idlestate, + idlestate_value_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return 0; + + value = strtoull(linebuf, &endp, 0); + + if (endp == linebuf || errno == ERANGE) + return 0; + + return value; +} + +/* read access to files which contain one string */ + +enum idlestate_string { + IDLESTATE_DESC, + IDLESTATE_NAME, + MAX_IDLESTATE_STRING_FILES +}; + +static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = { + [IDLESTATE_DESC] = "desc", + [IDLESTATE_NAME] = "name", +}; + + +static char *cpuidle_state_get_one_string(unsigned int cpu, + unsigned int idlestate, + enum idlestate_string which) +{ + char linebuf[MAX_LINE_LEN]; + char *result; + unsigned int len; + + if (which >= MAX_IDLESTATE_STRING_FILES) + return NULL; + + len = cpuidle_state_read_file(cpu, idlestate, + idlestate_string_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + result = strdup(linebuf); + if (result == NULL) + return NULL; + + if (result[strlen(result) - 1] == '\n') + result[strlen(result) - 1] = '\0'; + + return result; +} + +/* + * Returns: + * 1 if disabled + * 0 if enabled + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + */ +int cpuidle_is_state_disabled(unsigned int cpu, + unsigned int idlestate) +{ + if (cpuidle_state_count(cpu) <= idlestate) + return -1; + + if (!cpuidle_state_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_DISABLE); +} + +/* + * Pass 1 as last argument to disable or 0 to enable the state + * Returns: + * 0 on success + * negative values on error, for example: + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + * -3 No write access to disable/enable C-states + */ +int cpuidle_state_disable(unsigned int cpu, + unsigned int idlestate, + unsigned int disable) +{ + char value[SYSFS_PATH_MAX]; + int bytes_written; + + if (cpuidle_state_count(cpu) <= idlestate) + return -1; + + if (!cpuidle_state_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + + snprintf(value, SYSFS_PATH_MAX, "%u", disable); + + bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable", + value, sizeof(disable)); + if (bytes_written) + return 0; + return -3; +} + +unsigned long cpuidle_state_latency(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); +} + +unsigned long cpuidle_state_usage(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_USAGE); +} + +unsigned long long cpuidle_state_time(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_TIME); +} + +char *cpuidle_state_name(unsigned int cpu, unsigned int idlestate) +{ + return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_NAME); +} + +char *cpuidle_state_desc(unsigned int cpu, unsigned int idlestate) +{ + return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_DESC); +} + +/* + * Returns number of supported C-states of CPU core cpu + * Negativ in error case + * Zero if cpuidle does not export any C-states + */ +unsigned int cpuidle_state_count(unsigned int cpu) +{ + char file[SYSFS_PATH_MAX]; + struct stat statbuf; + int idlestates = 1; + + + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle"); + if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) + return 0; + + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu); + if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) + return 0; + + while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) { + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU + "cpu%u/cpuidle/state%d", cpu, idlestates); + idlestates++; + } + idlestates--; + return idlestates; +} + +/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/ + +/* + * helper function to read file from /sys into given buffer + * fname is a relative path under "cpu/cpuidle/" dir + */ +static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, + size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); + + return sysfs_read_file(path, buf, buflen); +} + + + +/* read access to files which contain one string */ + +enum cpuidle_string { + CPUIDLE_GOVERNOR, + CPUIDLE_GOVERNOR_RO, + CPUIDLE_DRIVER, + MAX_CPUIDLE_STRING_FILES +}; + +static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = { + [CPUIDLE_GOVERNOR] = "current_governor", + [CPUIDLE_GOVERNOR_RO] = "current_governor_ro", + [CPUIDLE_DRIVER] = "current_driver", +}; + + +static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which) +{ + char linebuf[MAX_LINE_LEN]; + char *result; + unsigned int len; + + if (which >= MAX_CPUIDLE_STRING_FILES) + return NULL; + + len = sysfs_cpuidle_read_file(cpuidle_string_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + result = strdup(linebuf); + if (result == NULL) + return NULL; + + if (result[strlen(result) - 1] == '\n') + result[strlen(result) - 1] = '\0'; + + return result; +} + +char *cpuidle_get_governor(void) +{ + char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO); + if (!tmp) + return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR); + else + return tmp; +} + +char *cpuidle_get_driver(void) +{ + return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER); +} +/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h new file mode 100644 index 000000000000..04eb3cfa6e42 --- /dev/null +++ b/tools/power/cpupower/lib/cpuidle.h @@ -0,0 +1,23 @@ +#ifndef __CPUPOWER_CPUIDLE_H__ +#define __CPUPOWER_CPUIDLE_H__ + +int cpuidle_is_state_disabled(unsigned int cpu, + unsigned int idlestate); +int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); +unsigned long cpuidle_state_latency(unsigned int cpu, + unsigned int idlestate); +unsigned long cpuidle_state_usage(unsigned int cpu, + unsigned int idlestate); +unsigned long long cpuidle_state_time(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_name(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_desc(unsigned int cpu, + unsigned int idlestate); +unsigned int cpuidle_state_count(unsigned int cpu); + +char *cpuidle_get_governor(void); +char *cpuidle_get_driver(void); + +#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */ diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c new file mode 100644 index 000000000000..9c395ec924de --- /dev/null +++ b/tools/power/cpupower/lib/cpupower.c @@ -0,0 +1,192 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * + * Licensed under the terms of the GNU GPL License version 2. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> + +#include "cpupower.h" +#include "cpupower_intern.h" + +unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +/* + * Detect whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * negative errno values in error case + */ +int cpupower_is_cpu_online(unsigned int cpu) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + unsigned long long value; + char linebuf[MAX_LINE_LEN]; + char *endp; + struct stat statbuf; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu); + + if (stat(path, &statbuf) != 0) + return 0; + + /* + * kernel without CONFIG_HOTPLUG_CPU + * -> cpuX directory exists, but not cpuX/online file + */ + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu); + if (stat(path, &statbuf) != 0) + return 1; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -errno; + + numread = read(fd, linebuf, MAX_LINE_LEN - 1); + if (numread < 1) { + close(fd); + return -EIO; + } + linebuf[numread] = '\0'; + close(fd); + + value = strtoull(linebuf, &endp, 0); + if (value > 1) + return -EINVAL; + + return value; +} + +/* returns -1 on failure, 0 on success */ +static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result) +{ + char linebuf[MAX_LINE_LEN]; + char *endp; + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", + cpu, fname); + if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + return -1; + *result = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + return 0; +} + +static int __compare(const void *t1, const void *t2) +{ + struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; + struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2; + if (top1->pkg < top2->pkg) + return -1; + else if (top1->pkg > top2->pkg) + return 1; + else if (top1->core < top2->core) + return -1; + else if (top1->core > top2->core) + return 1; + else if (top1->cpu < top2->cpu) + return -1; + else if (top1->cpu > top2->cpu) + return 1; + else + return 0; +} + +/* + * Returns amount of cpus, negative on error, cpu_top must be + * passed to cpu_topology_release to free resources + * + * Array is sorted after ->pkg, ->core, then ->cpu + */ +int get_cpu_topology(struct cpupower_topology *cpu_top) +{ + int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); + + cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); + if (cpu_top->core_info == NULL) + return -ENOMEM; + cpu_top->pkgs = cpu_top->cores = 0; + for (cpu = 0; cpu < cpus; cpu++) { + cpu_top->core_info[cpu].cpu = cpu; + cpu_top->core_info[cpu].is_online = cpupower_is_cpu_online(cpu); + if(sysfs_topology_read_file( + cpu, + "physical_package_id", + &(cpu_top->core_info[cpu].pkg)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } + if(sysfs_topology_read_file( + cpu, + "core_id", + &(cpu_top->core_info[cpu].core)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } + } + + qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), + __compare); + + /* Count the number of distinct pkgs values. This works + because the primary sort of the core_info struct was just + done by pkg value. */ + last_pkg = cpu_top->core_info[0].pkg; + for(cpu = 1; cpu < cpus; cpu++) { + if (cpu_top->core_info[cpu].pkg != last_pkg && + cpu_top->core_info[cpu].pkg != -1) { + + last_pkg = cpu_top->core_info[cpu].pkg; + cpu_top->pkgs++; + } + } + if (!(cpu_top->core_info[0].pkg == -1)) + cpu_top->pkgs++; + + /* Intel's cores count is not consecutively numbered, there may + * be a core_id of 3, but none of 2. Assume there always is 0 + * Get amount of cores by counting duplicates in a package + for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) { + if (cpu_top->core_info[cpu].core == 0) + cpu_top->cores++; + */ + return cpus; +} + +void cpu_topology_release(struct cpupower_topology cpu_top) +{ + free(cpu_top.core_info); +} diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h new file mode 100644 index 000000000000..fa031fcc7710 --- /dev/null +++ b/tools/power/cpupower/lib/cpupower.h @@ -0,0 +1,35 @@ +#ifndef __CPUPOWER_CPUPOWER_H__ +#define __CPUPOWER_CPUPOWER_H__ + +struct cpupower_topology { + /* Amount of CPU cores, packages and threads per core in the system */ + unsigned int cores; + unsigned int pkgs; + unsigned int threads; /* per core */ + + /* Array gets mallocated with cores entries, holding per core info */ + struct cpuid_core_info *core_info; +}; + +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +int get_cpu_topology(struct cpupower_topology *cpu_top); +void cpu_topology_release(struct cpupower_topology cpu_top); +int cpupower_is_cpu_online(unsigned int cpu); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h new file mode 100644 index 000000000000..f8ec4009621c --- /dev/null +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -0,0 +1,5 @@ +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define MAX_LINE_LEN 4096 +#define SYSFS_PATH_MAX 255 + +unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); diff --git a/tools/power/cpupower/lib/sysfs.c b/tools/power/cpupower/lib/sysfs.c deleted file mode 100644 index 870713a75a81..000000000000 --- a/tools/power/cpupower/lib/sysfs.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> - * - * Licensed under the terms of the GNU GPL License version 2. - */ - -#include <stdio.h> -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <limits.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> - -#include "cpufreq.h" - -#define PATH_TO_CPU "/sys/devices/system/cpu/" -#define MAX_LINE_LEN 4096 -#define SYSFS_PATH_MAX 255 - - -static unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - - -/* CPUFREQ sysfs access **************************************************/ - -/* helper function to read file from /sys into given buffer */ -/* fname is a relative path under "cpuX/cpufreq" dir */ -static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, - char *buf, size_t buflen) -{ - char path[SYSFS_PATH_MAX]; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", - cpu, fname); - return sysfs_read_file(path, buf, buflen); -} - -/* helper function to write a new value to a /sys file */ -/* fname is a relative path under "cpuX/cpufreq" dir */ -static unsigned int sysfs_cpufreq_write_file(unsigned int cpu, - const char *fname, - const char *value, size_t len) -{ - char path[SYSFS_PATH_MAX]; - int fd; - ssize_t numwrite; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", - cpu, fname); - - fd = open(path, O_WRONLY); - if (fd == -1) - return 0; - - numwrite = write(fd, value, len); - if (numwrite < 1) { - close(fd); - return 0; - } - - close(fd); - - return (unsigned int) numwrite; -} - -/* read access to files which contain one numeric value */ - -enum cpufreq_value { - CPUINFO_CUR_FREQ, - CPUINFO_MIN_FREQ, - CPUINFO_MAX_FREQ, - CPUINFO_LATENCY, - SCALING_CUR_FREQ, - SCALING_MIN_FREQ, - SCALING_MAX_FREQ, - STATS_NUM_TRANSITIONS, - MAX_CPUFREQ_VALUE_READ_FILES -}; - -static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { - [CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq", - [CPUINFO_MIN_FREQ] = "cpuinfo_min_freq", - [CPUINFO_MAX_FREQ] = "cpuinfo_max_freq", - [CPUINFO_LATENCY] = "cpuinfo_transition_latency", - [SCALING_CUR_FREQ] = "scaling_cur_freq", - [SCALING_MIN_FREQ] = "scaling_min_freq", - [SCALING_MAX_FREQ] = "scaling_max_freq", - [STATS_NUM_TRANSITIONS] = "stats/total_trans" -}; - - -static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, - enum cpufreq_value which) -{ - unsigned long value; - unsigned int len; - char linebuf[MAX_LINE_LEN]; - char *endp; - - if (which >= MAX_CPUFREQ_VALUE_READ_FILES) - return 0; - - len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], - linebuf, sizeof(linebuf)); - - if (len == 0) - return 0; - - value = strtoul(linebuf, &endp, 0); - - if (endp == linebuf || errno == ERANGE) - return 0; - - return value; -} - -/* read access to files which contain one string */ - -enum cpufreq_string { - SCALING_DRIVER, - SCALING_GOVERNOR, - MAX_CPUFREQ_STRING_FILES -}; - -static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { - [SCALING_DRIVER] = "scaling_driver", - [SCALING_GOVERNOR] = "scaling_governor", -}; - - -static char *sysfs_cpufreq_get_one_string(unsigned int cpu, - enum cpufreq_string which) -{ - char linebuf[MAX_LINE_LEN]; - char *result; - unsigned int len; - - if (which >= MAX_CPUFREQ_STRING_FILES) - return NULL; - - len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which], - linebuf, sizeof(linebuf)); - if (len == 0) - return NULL; - - result = strdup(linebuf); - if (result == NULL) - return NULL; - - if (result[strlen(result) - 1] == '\n') - result[strlen(result) - 1] = '\0'; - - return result; -} - -/* write access */ - -enum cpufreq_write { - WRITE_SCALING_MIN_FREQ, - WRITE_SCALING_MAX_FREQ, - WRITE_SCALING_GOVERNOR, - WRITE_SCALING_SET_SPEED, - MAX_CPUFREQ_WRITE_FILES -}; - -static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = { - [WRITE_SCALING_MIN_FREQ] = "scaling_min_freq", - [WRITE_SCALING_MAX_FREQ] = "scaling_max_freq", - [WRITE_SCALING_GOVERNOR] = "scaling_governor", - [WRITE_SCALING_SET_SPEED] = "scaling_setspeed", -}; - -static int sysfs_cpufreq_write_one_value(unsigned int cpu, - enum cpufreq_write which, - const char *new_value, size_t len) -{ - if (which >= MAX_CPUFREQ_WRITE_FILES) - return 0; - - if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which], - new_value, len) != len) - return -ENODEV; - - return 0; -}; - -unsigned long sysfs_get_freq_kernel(unsigned int cpu) -{ - return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ); -} - -unsigned long sysfs_get_freq_hardware(unsigned int cpu) -{ - return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ); -} - -unsigned long sysfs_get_freq_transition_latency(unsigned int cpu) -{ - return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY); -} - -int sysfs_get_freq_hardware_limits(unsigned int cpu, - unsigned long *min, - unsigned long *max) -{ - if ((!min) || (!max)) - return -EINVAL; - - *min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ); - if (!*min) - return -ENODEV; - - *max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ); - if (!*max) - return -ENODEV; - - return 0; -} - -char *sysfs_get_freq_driver(unsigned int cpu) -{ - return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER); -} - -struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu) -{ - struct cpufreq_policy *policy; - - policy = malloc(sizeof(struct cpufreq_policy)); - if (!policy) - return NULL; - - policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR); - if (!policy->governor) { - free(policy); - return NULL; - } - policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ); - policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ); - if ((!policy->min) || (!policy->max)) { - free(policy->governor); - free(policy); - return NULL; - } - - return policy; -} - -struct cpufreq_available_governors * -sysfs_get_freq_available_governors(unsigned int cpu) { - struct cpufreq_available_governors *first = NULL; - struct cpufreq_available_governors *current = NULL; - char linebuf[MAX_LINE_LEN]; - unsigned int pos, i; - unsigned int len; - - len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors", - linebuf, sizeof(linebuf)); - if (len == 0) - return NULL; - - pos = 0; - for (i = 0; i < len; i++) { - if (linebuf[i] == ' ' || linebuf[i] == '\n') { - if (i - pos < 2) - continue; - if (current) { - current->next = malloc(sizeof(*current)); - if (!current->next) - goto error_out; - current = current->next; - } else { - first = malloc(sizeof(*first)); - if (!first) - goto error_out; - current = first; - } - current->first = first; - current->next = NULL; - - current->governor = malloc(i - pos + 1); - if (!current->governor) - goto error_out; - - memcpy(current->governor, linebuf + pos, i - pos); - current->governor[i - pos] = '\0'; - pos = i + 1; - } - } - - return first; - - error_out: - while (first) { - current = first->next; - if (first->governor) - free(first->governor); - free(first); - first = current; - } - return NULL; -} - - -struct cpufreq_available_frequencies * -sysfs_get_available_frequencies(unsigned int cpu) { - struct cpufreq_available_frequencies *first = NULL; - struct cpufreq_available_frequencies *current = NULL; - char one_value[SYSFS_PATH_MAX]; - char linebuf[MAX_LINE_LEN]; - unsigned int pos, i; - unsigned int len; - - len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies", - linebuf, sizeof(linebuf)); - if (len == 0) - return NULL; - - pos = 0; - for (i = 0; i < len; i++) { - if (linebuf[i] == ' ' || linebuf[i] == '\n') { - if (i - pos < 2) - continue; - if (i - pos >= SYSFS_PATH_MAX) - goto error_out; - if (current) { - current->next = malloc(sizeof(*current)); - if (!current->next) - goto error_out; - current = current->next; - } else { - first = malloc(sizeof(*first)); - if (!first) - goto error_out; - current = first; - } - current->first = first; - current->next = NULL; - - memcpy(one_value, linebuf + pos, i - pos); - one_value[i - pos] = '\0'; - if (sscanf(one_value, "%lu", ¤t->frequency) != 1) - goto error_out; - - pos = i + 1; - } - } - - return first; - - error_out: - while (first) { - current = first->next; - free(first); - first = current; - } - return NULL; -} - -static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, - const char *file) -{ - struct cpufreq_affected_cpus *first = NULL; - struct cpufreq_affected_cpus *current = NULL; - char one_value[SYSFS_PATH_MAX]; - char linebuf[MAX_LINE_LEN]; - unsigned int pos, i; - unsigned int len; - - len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf)); - if (len == 0) - return NULL; - - pos = 0; - for (i = 0; i < len; i++) { - if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') { - if (i - pos < 1) - continue; - if (i - pos >= SYSFS_PATH_MAX) - goto error_out; - if (current) { - current->next = malloc(sizeof(*current)); - if (!current->next) - goto error_out; - current = current->next; - } else { - first = malloc(sizeof(*first)); - if (!first) - goto error_out; - current = first; - } - current->first = first; - current->next = NULL; - - memcpy(one_value, linebuf + pos, i - pos); - one_value[i - pos] = '\0'; - - if (sscanf(one_value, "%u", ¤t->cpu) != 1) - goto error_out; - - pos = i + 1; - } - } - - return first; - - error_out: - while (first) { - current = first->next; - free(first); - first = current; - } - return NULL; -} - -struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(unsigned int cpu) -{ - return sysfs_get_cpu_list(cpu, "affected_cpus"); -} - -struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(unsigned int cpu) -{ - return sysfs_get_cpu_list(cpu, "related_cpus"); -} - -struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu, - unsigned long long *total_time) { - struct cpufreq_stats *first = NULL; - struct cpufreq_stats *current = NULL; - char one_value[SYSFS_PATH_MAX]; - char linebuf[MAX_LINE_LEN]; - unsigned int pos, i; - unsigned int len; - - len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state", - linebuf, sizeof(linebuf)); - if (len == 0) - return NULL; - - *total_time = 0; - pos = 0; - for (i = 0; i < len; i++) { - if (i == strlen(linebuf) || linebuf[i] == '\n') { - if (i - pos < 2) - continue; - if ((i - pos) >= SYSFS_PATH_MAX) - goto error_out; - if (current) { - current->next = malloc(sizeof(*current)); - if (!current->next) - goto error_out; - current = current->next; - } else { - first = malloc(sizeof(*first)); - if (!first) - goto error_out; - current = first; - } - current->first = first; - current->next = NULL; - - memcpy(one_value, linebuf + pos, i - pos); - one_value[i - pos] = '\0'; - if (sscanf(one_value, "%lu %llu", - ¤t->frequency, - ¤t->time_in_state) != 2) - goto error_out; - - *total_time = *total_time + current->time_in_state; - pos = i + 1; - } - } - - return first; - - error_out: - while (first) { - current = first->next; - free(first); - first = current; - } - return NULL; -} - -unsigned long sysfs_get_freq_transitions(unsigned int cpu) -{ - return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS); -} - -static int verify_gov(char *new_gov, char *passed_gov) -{ - unsigned int i, j = 0; - - if (!passed_gov || (strlen(passed_gov) > 19)) - return -EINVAL; - - strncpy(new_gov, passed_gov, 20); - for (i = 0; i < 20; i++) { - if (j) { - new_gov[i] = '\0'; - continue; - } - if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z')) - continue; - - if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z')) - continue; - - if (new_gov[i] == '-') - continue; - - if (new_gov[i] == '_') - continue; - - if (new_gov[i] == '\0') { - j = 1; - continue; - } - return -EINVAL; - } - new_gov[19] = '\0'; - return 0; -} - -int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor) -{ - char new_gov[SYSFS_PATH_MAX]; - - if (!governor) - return -EINVAL; - - if (verify_gov(new_gov, governor)) - return -EINVAL; - - return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR, - new_gov, strlen(new_gov)); -}; - -int sysfs_modify_freq_policy_max(unsigned int cpu, unsigned long max_freq) -{ - char value[SYSFS_PATH_MAX]; - - snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq); - - return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, - value, strlen(value)); -}; - - -int sysfs_modify_freq_policy_min(unsigned int cpu, unsigned long min_freq) -{ - char value[SYSFS_PATH_MAX]; - - snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq); - - return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, - value, strlen(value)); -}; - - -int sysfs_set_freq_policy(unsigned int cpu, struct cpufreq_policy *policy) -{ - char min[SYSFS_PATH_MAX]; - char max[SYSFS_PATH_MAX]; - char gov[SYSFS_PATH_MAX]; - int ret; - unsigned long old_min; - int write_max_first; - - if (!policy || !(policy->governor)) - return -EINVAL; - - if (policy->max < policy->min) - return -EINVAL; - - if (verify_gov(gov, policy->governor)) - return -EINVAL; - - snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min); - snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max); - - old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ); - write_max_first = (old_min && (policy->max < old_min) ? 0 : 1); - - if (write_max_first) { - ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, - max, strlen(max)); - if (ret) - return ret; - } - - ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min, - strlen(min)); - if (ret) - return ret; - - if (!write_max_first) { - ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ, - max, strlen(max)); - if (ret) - return ret; - } - - return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR, - gov, strlen(gov)); -} - -int sysfs_set_frequency(unsigned int cpu, unsigned long target_frequency) -{ - struct cpufreq_policy *pol = sysfs_get_freq_policy(cpu); - char userspace_gov[] = "userspace"; - char freq[SYSFS_PATH_MAX]; - int ret; - - if (!pol) - return -ENODEV; - - if (strncmp(pol->governor, userspace_gov, 9) != 0) { - ret = sysfs_modify_freq_policy_governor(cpu, userspace_gov); - if (ret) { - cpufreq_put_policy(pol); - return ret; - } - } - - cpufreq_put_policy(pol); - - snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency); - - return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED, - freq, strlen(freq)); -} - -/* CPUFREQ sysfs access **************************************************/ - -/* General sysfs access **************************************************/ -int sysfs_cpu_exists(unsigned int cpu) -{ - char file[SYSFS_PATH_MAX]; - struct stat statbuf; - - snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/", cpu); - - if (stat(file, &statbuf) != 0) - return -ENOSYS; - - return S_ISDIR(statbuf.st_mode) ? 0 : -ENOSYS; -} - -/* General sysfs access **************************************************/ diff --git a/tools/power/cpupower/lib/sysfs.h b/tools/power/cpupower/lib/sysfs.h deleted file mode 100644 index c76a5e0af501..000000000000 --- a/tools/power/cpupower/lib/sysfs.h +++ /dev/null @@ -1,31 +0,0 @@ -/* General */ -extern unsigned int sysfs_cpu_exists(unsigned int cpu); - -/* CPUfreq */ -extern unsigned long sysfs_get_freq_kernel(unsigned int cpu); -extern unsigned long sysfs_get_freq_hardware(unsigned int cpu); -extern unsigned long sysfs_get_freq_transition_latency(unsigned int cpu); -extern int sysfs_get_freq_hardware_limits(unsigned int cpu, - unsigned long *min, unsigned long *max); -extern char *sysfs_get_freq_driver(unsigned int cpu); -extern struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu); -extern struct cpufreq_available_governors *sysfs_get_freq_available_governors( - unsigned int cpu); -extern struct cpufreq_available_frequencies *sysfs_get_available_frequencies( - unsigned int cpu); -extern struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus( - unsigned int cpu); -extern struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus( - unsigned int cpu); -extern struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu, - unsigned long long *total_time); -extern unsigned long sysfs_get_freq_transitions(unsigned int cpu); -extern int sysfs_set_freq_policy(unsigned int cpu, - struct cpufreq_policy *policy); -extern int sysfs_modify_freq_policy_min(unsigned int cpu, - unsigned long min_freq); -extern int sysfs_modify_freq_policy_max(unsigned int cpu, - unsigned long max_freq); -extern int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor); -extern int sysfs_set_frequency(unsigned int cpu, - unsigned long target_frequency); diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 9c85a382e355..6aa8d239dff9 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -1,7 +1,7 @@ .TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP -cpupower frequency\-info \- Utility to retrieve cpufreq kernel information +cpupower\-frequency\-info \- Utility to retrieve cpufreq kernel information .SH "SYNTAX" .LP cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP] diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1 index 3eacc8d03d1a..b50570221a5b 100644 --- a/tools/power/cpupower/man/cpupower-frequency-set.1 +++ b/tools/power/cpupower/man/cpupower-frequency-set.1 @@ -1,7 +1,7 @@ .TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP -cpupower frequency\-set \- A small tool which allows to modify cpufreq settings. +cpupower\-frequency\-set \- A small tool which allows to modify cpufreq settings. .SH "SYNTAX" .LP cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP] diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1 index 7b3646adb92f..80a1311fa747 100644 --- a/tools/power/cpupower/man/cpupower-idle-info.1 +++ b/tools/power/cpupower/man/cpupower-idle-info.1 @@ -1,7 +1,7 @@ .TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP -cpupower idle\-info \- Utility to retrieve cpu idle kernel information +cpupower\-idle\-info \- Utility to retrieve cpu idle kernel information .SH "SYNTAX" .LP cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 580c4e3ea92a..21916cff7516 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -1,7 +1,7 @@ .TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP -cpupower idle\-set \- Utility to set cpu idle state specific kernel options +cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options .SH "SYNTAX" .LP cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 0fbd1a22c0a9..b4bf76971dc9 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -16,8 +16,8 @@ #include <getopt.h> #include "cpufreq.h" +#include "cpuidle.h" #include "helpers/helpers.h" -#include "helpers/sysfs.h" #define NORM_FREQ_LEN 32 @@ -296,7 +296,7 @@ int cmd_freq_set(int argc, char **argv) struct cpufreq_affected_cpus *cpus; if (!bitmask_isbitset(cpus_chosen, cpu) || - cpufreq_cpu_exists(cpu)) + cpupower_is_cpu_online(cpu)) continue; cpus = cpufreq_get_related_cpus(cpu); @@ -316,10 +316,10 @@ int cmd_freq_set(int argc, char **argv) cpu <= bitmask_last(cpus_chosen); cpu++) { if (!bitmask_isbitset(cpus_chosen, cpu) || - cpufreq_cpu_exists(cpu)) + cpupower_is_cpu_online(cpu)) continue; - if (sysfs_is_cpu_online(cpu) != 1) + if (cpupower_is_cpu_online(cpu) != 1) continue; printf(_("Setting cpu: %d\n"), cpu); diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 8bf8ab5ffa25..b59c85defa05 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -13,8 +13,10 @@ #include <string.h> #include <getopt.h> -#include "helpers/helpers.h" +#include <cpuidle.h> + #include "helpers/sysfs.h" +#include "helpers/helpers.h" #include "helpers/bitmask.h" #define LINE_LEN 10 @@ -24,7 +26,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) unsigned int idlestates, idlestate; char *tmp; - idlestates = sysfs_get_idlestate_count(cpu); + idlestates = cpuidle_state_count(cpu); if (idlestates == 0) { printf(_("CPU %u: No idle states\n"), cpu); return; @@ -33,7 +35,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("Number of idle states: %d\n"), idlestates); printf(_("Available idle states:")); for (idlestate = 0; idlestate < idlestates; idlestate++) { - tmp = sysfs_get_idlestate_name(cpu, idlestate); + tmp = cpuidle_state_name(cpu, idlestate); if (!tmp) continue; printf(" %s", tmp); @@ -45,28 +47,28 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) return; for (idlestate = 0; idlestate < idlestates; idlestate++) { - int disabled = sysfs_is_idlestate_disabled(cpu, idlestate); + int disabled = cpuidle_is_state_disabled(cpu, idlestate); /* Disabled interface not supported on older kernels */ if (disabled < 0) disabled = 0; - tmp = sysfs_get_idlestate_name(cpu, idlestate); + tmp = cpuidle_state_name(cpu, idlestate); if (!tmp) continue; printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : ""); free(tmp); - tmp = sysfs_get_idlestate_desc(cpu, idlestate); + tmp = cpuidle_state_desc(cpu, idlestate); if (!tmp) continue; printf(_("Flags/Description: %s\n"), tmp); free(tmp); printf(_("Latency: %lu\n"), - sysfs_get_idlestate_latency(cpu, idlestate)); + cpuidle_state_latency(cpu, idlestate)); printf(_("Usage: %lu\n"), - sysfs_get_idlestate_usage(cpu, idlestate)); + cpuidle_state_usage(cpu, idlestate)); printf(_("Duration: %llu\n"), - sysfs_get_idlestate_time(cpu, idlestate)); + cpuidle_state_time(cpu, idlestate)); } } @@ -74,7 +76,7 @@ static void cpuidle_general_output(void) { char *tmp; - tmp = sysfs_get_cpuidle_driver(); + tmp = cpuidle_get_driver(); if (!tmp) { printf(_("Could not determine cpuidle driver\n")); return; @@ -83,7 +85,7 @@ static void cpuidle_general_output(void) printf(_("CPUidle driver: %s\n"), tmp); free(tmp); - tmp = sysfs_get_cpuidle_governor(); + tmp = cpuidle_get_governor(); if (!tmp) { printf(_("Could not determine cpuidle governor\n")); return; @@ -98,7 +100,7 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) long max_allowed_cstate = 2000000000; unsigned int cstate, cstates; - cstates = sysfs_get_idlestate_count(cpu); + cstates = cpuidle_state_count(cpu); if (cstates == 0) { printf(_("CPU %u: No C-states info\n"), cpu); return; @@ -113,11 +115,11 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) "type[C%d] "), cstate, cstate); printf(_("promotion[--] demotion[--] ")); printf(_("latency[%03lu] "), - sysfs_get_idlestate_latency(cpu, cstate)); + cpuidle_state_latency(cpu, cstate)); printf(_("usage[%08lu] "), - sysfs_get_idlestate_usage(cpu, cstate)); + cpuidle_state_usage(cpu, cstate)); printf(_("duration[%020Lu] \n"), - sysfs_get_idlestate_time(cpu, cstate)); + cpuidle_state_time(cpu, cstate)); } } diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index d6b6ae44b8c2..691c24d50ef4 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -5,12 +5,12 @@ #include <limits.h> #include <string.h> #include <ctype.h> - #include <getopt.h> -#include "cpufreq.h" +#include <cpufreq.h> +#include <cpuidle.h> + #include "helpers/helpers.h" -#include "helpers/sysfs.h" static struct option info_opts[] = { {"disable", required_argument, NULL, 'd'}, @@ -104,16 +104,16 @@ int cmd_idle_set(int argc, char **argv) if (!bitmask_isbitset(cpus_chosen, cpu)) continue; - if (sysfs_is_cpu_online(cpu) != 1) + if (cpupower_is_cpu_online(cpu) != 1) continue; - idlestates = sysfs_get_idlestate_count(cpu); + idlestates = cpuidle_state_count(cpu); if (idlestates <= 0) continue; switch (param) { case 'd': - ret = sysfs_idlestate_disable(cpu, idlestate, 1); + ret = cpuidle_state_disable(cpu, idlestate, 1); if (ret == 0) printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); else if (ret == -1) @@ -126,7 +126,7 @@ int cmd_idle_set(int argc, char **argv) idlestate, cpu); break; case 'e': - ret = sysfs_idlestate_disable(cpu, idlestate, 0); + ret = cpuidle_state_disable(cpu, idlestate, 0); if (ret == 0) printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); else if (ret == -1) @@ -140,13 +140,13 @@ int cmd_idle_set(int argc, char **argv) break; case 'D': for (idlestate = 0; idlestate < idlestates; idlestate++) { - disabled = sysfs_is_idlestate_disabled + disabled = cpuidle_is_state_disabled (cpu, idlestate); - state_latency = sysfs_get_idlestate_latency + state_latency = cpuidle_state_latency (cpu, idlestate); if (disabled == 1) { if (latency > state_latency){ - ret = sysfs_idlestate_disable + ret = cpuidle_state_disable (cpu, idlestate, 0); if (ret == 0) printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); @@ -154,7 +154,7 @@ int cmd_idle_set(int argc, char **argv) continue; } if (latency <= state_latency){ - ret = sysfs_idlestate_disable + ret = cpuidle_state_disable (cpu, idlestate, 1); if (ret == 0) printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); @@ -163,10 +163,10 @@ int cmd_idle_set(int argc, char **argv) break; case 'E': for (idlestate = 0; idlestate < idlestates; idlestate++) { - disabled = sysfs_is_idlestate_disabled + disabled = cpuidle_is_state_disabled (cpu, idlestate); if (disabled == 1) { - ret = sysfs_idlestate_disable + ret = cpuidle_state_disable (cpu, idlestate, 0); if (ret == 0) printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index aa9e95486a2d..afb66f80554e 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -14,6 +14,7 @@ #include <locale.h> #include "helpers/bitmask.h" +#include <cpupower.h> /* Internationalization ****************************/ #ifdef NLS @@ -92,31 +93,6 @@ extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ -struct cpuid_core_info { - int pkg; - int core; - int cpu; - - /* flags */ - unsigned int is_online:1; -}; - -/* CPU topology/hierarchy parsing ******************/ -struct cpupower_topology { - /* Amount of CPU cores, packages and threads per core in the system */ - unsigned int cores; - unsigned int pkgs; - unsigned int threads; /* per core */ - - /* Array gets mallocated with cores entries, holding per core info */ - struct cpuid_core_info *core_info; -}; - -extern int get_cpu_topology(struct cpupower_topology *cpu_top); -extern void cpu_topology_release(struct cpupower_topology cpu_top); - -/* CPU topology/hierarchy parsing ******************/ - /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index 5f9c908f4557..a1a6c6041a1e 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -16,110 +16,7 @@ #include <errno.h> #include <fcntl.h> -#include <helpers/helpers.h> -#include <helpers/sysfs.h> +#include <cpuidle.h> -/* returns -1 on failure, 0 on success */ -static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result) -{ - char linebuf[MAX_LINE_LEN]; - char *endp; - char path[SYSFS_PATH_MAX]; +/* CPU topology/hierarchy parsing ******************/ - snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", - cpu, fname); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) - return -1; - *result = strtol(linebuf, &endp, 0); - if (endp == linebuf || errno == ERANGE) - return -1; - return 0; -} - -static int __compare(const void *t1, const void *t2) -{ - struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; - struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2; - if (top1->pkg < top2->pkg) - return -1; - else if (top1->pkg > top2->pkg) - return 1; - else if (top1->core < top2->core) - return -1; - else if (top1->core > top2->core) - return 1; - else if (top1->cpu < top2->cpu) - return -1; - else if (top1->cpu > top2->cpu) - return 1; - else - return 0; -} - -/* - * Returns amount of cpus, negative on error, cpu_top must be - * passed to cpu_topology_release to free resources - * - * Array is sorted after ->pkg, ->core, then ->cpu - */ -int get_cpu_topology(struct cpupower_topology *cpu_top) -{ - int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); - - cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); - if (cpu_top->core_info == NULL) - return -ENOMEM; - cpu_top->pkgs = cpu_top->cores = 0; - for (cpu = 0; cpu < cpus; cpu++) { - cpu_top->core_info[cpu].cpu = cpu; - cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - if(sysfs_topology_read_file( - cpu, - "physical_package_id", - &(cpu_top->core_info[cpu].pkg)) < 0) { - cpu_top->core_info[cpu].pkg = -1; - cpu_top->core_info[cpu].core = -1; - continue; - } - if(sysfs_topology_read_file( - cpu, - "core_id", - &(cpu_top->core_info[cpu].core)) < 0) { - cpu_top->core_info[cpu].pkg = -1; - cpu_top->core_info[cpu].core = -1; - continue; - } - } - - qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), - __compare); - - /* Count the number of distinct pkgs values. This works - because the primary sort of the core_info struct was just - done by pkg value. */ - last_pkg = cpu_top->core_info[0].pkg; - for(cpu = 1; cpu < cpus; cpu++) { - if (cpu_top->core_info[cpu].pkg != last_pkg && - cpu_top->core_info[cpu].pkg != -1) { - - last_pkg = cpu_top->core_info[cpu].pkg; - cpu_top->pkgs++; - } - } - if (!(cpu_top->core_info[0].pkg == -1)) - cpu_top->pkgs++; - - /* Intel's cores count is not consecutively numbered, there may - * be a core_id of 3, but none of 2. Assume there always is 0 - * Get amount of cores by counting duplicates in a package - for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) { - if (cpu_top->core_info[cpu].core == 0) - cpu_top->cores++; - */ - return cpus; -} - -void cpu_topology_release(struct cpupower_topology cpu_top) -{ - free(cpu_top.core_info); -} diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index bcd22a1a3970..1b5da0066ebf 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -10,8 +10,8 @@ #include <stdint.h> #include <string.h> #include <limits.h> +#include <cpuidle.h> -#include "helpers/sysfs.h" #include "helpers/helpers.h" #include "idle_monitor/cpupower-monitor.h" @@ -51,7 +51,7 @@ static int cpuidle_start(void) for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; state++) { previous_count[cpu][state] = - sysfs_get_idlestate_time(cpu, state); + cpuidle_state_time(cpu, state); dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } @@ -70,7 +70,7 @@ static int cpuidle_stop(void) for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; state++) { current_count[cpu][state] = - sysfs_get_idlestate_time(cpu, state); + cpuidle_state_time(cpu, state); dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } @@ -132,13 +132,13 @@ static struct cpuidle_monitor *cpuidle_register(void) char *tmp; /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = sysfs_get_idlestate_name(0, num); + tmp = cpuidle_state_name(0, num); if (tmp == NULL) continue; @@ -146,7 +146,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = sysfs_get_idlestate_desc(0, num); + tmp = cpuidle_state_desc(0, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 20a257a12ea5..acbf7ff2ee6e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -66,6 +66,8 @@ unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; +unsigned int do_irtl_snb; +unsigned int do_irtl_hsw; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; @@ -187,7 +189,7 @@ struct pkg_data { unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; unsigned long long pkg_both_core_gfxe_c0; - unsigned long long gfx_rc6_ms; + long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ @@ -621,8 +623,14 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%8d", p->pkg_temp_c); /* GFXrc6 */ - if (do_gfx_rc6_ms) - outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); + if (do_gfx_rc6_ms) { + if (p->gfx_rc6_ms == -1) { /* detect counter reset */ + outp += sprintf(outp, " ***.**"); + } else { + outp += sprintf(outp, "%8.2f", + p->gfx_rc6_ms / 10.0 / interval_float); + } + } /* GFXMHz */ if (do_gfx_mhz) @@ -766,7 +774,12 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc10 = new->pc10 - old->pc10; old->pkg_temp_c = new->pkg_temp_c; - old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; + /* flag an error when rc6 counter resets/wraps */ + if (old->gfx_rc6_ms > new->gfx_rc6_ms) + old->gfx_rc6_ms = -1; + else + old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; + old->gfx_mhz = new->gfx_mhz; DELTA_WRAP32(new->energy_pkg, old->energy_pkg); @@ -1296,6 +1309,7 @@ int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void @@ -1579,6 +1593,47 @@ dump_config_tdp(void) fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, ")\n"); } + +unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; + +void print_irtl(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + if (!do_irtl_hsw) + return; + + get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + +} void free_fd_percpu(void) { int i; @@ -2144,6 +2199,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case 0x56: /* BDX-DE */ case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ + case 0x55: /* SKX */ pkg_cstate_limits = hsw_pkg_cstate_limits; break; case 0x37: /* BYT */ @@ -2156,6 +2214,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case 0x57: /* PHI */ pkg_cstate_limits = phi_pkg_cstate_limits; break; + case 0x5C: /* BXT */ + pkg_cstate_limits = bxt_pkg_cstate_limits; + break; default: return 0; } @@ -2248,6 +2309,9 @@ int has_config_tdp(unsigned int family, unsigned int model) case 0x56: /* BDX-DE */ case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ + case 0x55: /* SKX */ case 0x57: /* Knights Landing */ return 1; @@ -2585,13 +2649,19 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x47: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; + case 0x5C: /* BXT */ + do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; + break; case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x55: /* SKX */ case 0x57: /* KNL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; @@ -2871,6 +2941,10 @@ int has_snb_msrs(unsigned int family, unsigned int model) case 0x56: /* BDX-DE */ case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ + case 0x55: /* SKX */ + case 0x5C: /* BXT */ return 1; } return 0; @@ -2879,9 +2953,14 @@ int has_snb_msrs(unsigned int family, unsigned int model) /* * HSW adds support for additional MSRs: * - * MSR_PKG_C8_RESIDENCY 0x00000630 - * MSR_PKG_C9_RESIDENCY 0x00000631 - * MSR_PKG_C10_RESIDENCY 0x00000632 + * MSR_PKG_C8_RESIDENCY 0x00000630 + * MSR_PKG_C9_RESIDENCY 0x00000631 + * MSR_PKG_C10_RESIDENCY 0x00000632 + * + * MSR_PKGC8_IRTL 0x00000633 + * MSR_PKGC9_IRTL 0x00000634 + * MSR_PKGC10_IRTL 0x00000635 + * */ int has_hsw_msrs(unsigned int family, unsigned int model) { @@ -2893,6 +2972,9 @@ int has_hsw_msrs(unsigned int family, unsigned int model) case 0x3D: /* BDW */ case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ + case 0x5C: /* BXT */ return 1; } return 0; @@ -2914,6 +2996,8 @@ int has_skl_msrs(unsigned int family, unsigned int model) switch (model) { case 0x4E: /* SKL */ case 0x5E: /* SKL */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ return 1; } return 0; @@ -3187,7 +3271,7 @@ void process_cpuid() if (debug) decode_misc_enable_msr(); - if (max_level >= 0x7) { + if (max_level >= 0x7 && debug) { int has_sgx; ecx = 0; @@ -3221,7 +3305,15 @@ void process_cpuid() switch(model) { case 0x4E: /* SKL */ case 0x5E: /* SKL */ - crystal_hz = 24000000; /* 24 MHz */ + case 0x8E: /* KBL */ + case 0x9E: /* KBL */ + crystal_hz = 24000000; /* 24.0 MHz */ + break; + case 0x55: /* SKX */ + crystal_hz = 25000000; /* 25.0 MHz */ + break; + case 0x5C: /* BXT */ + crystal_hz = 19200000; /* 19.2 MHz */ break; default: crystal_hz = 0; @@ -3254,11 +3346,13 @@ void process_cpuid() do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); + do_irtl_snb = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); do_pc3 = (pkg_cstate_limit >= PCL__3); do_pc6 = (pkg_cstate_limit >= PCL__6); do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); do_c8_c9_c10 = has_hsw_msrs(family, model); + do_irtl_hsw = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); @@ -3564,6 +3658,9 @@ void turbostat_init() if (debug) for_all_cpus(print_thermal, ODD_COUNTERS); + + if (debug && do_irtl_snb) + print_irtl(); } int fork_it(char **argv) @@ -3629,7 +3726,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 4.11 27 Feb 2016" + fprintf(outf, "turbostat version 4.12 5 Apr 2016" " - Len Brown <lenb@kernel.org>\n"); } diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak index c16ce833079c..c16ce833079c 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/scripts/utilities.mak diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore new file mode 100644 index 000000000000..11d888ca6a92 --- /dev/null +++ b/tools/testing/radix-tree/.gitignore @@ -0,0 +1,2 @@ +main +radix-tree.c diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile new file mode 100644 index 000000000000..604212db9d4b --- /dev/null +++ b/tools/testing/radix-tree/Makefile @@ -0,0 +1,19 @@ + +CFLAGS += -I. -g -Wall -D_LGPL_SOURCE +LDFLAGS += -lpthread -lurcu +TARGETS = main +OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ + regression1.o regression2.o regression3.o + +targets: $(TARGETS) + +main: $(OFILES) + $(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main + +clean: + $(RM) -f $(TARGETS) *.o radix-tree.c + +$(OFILES): *.h */*.h + +radix-tree.c: ../../../lib/radix-tree.c + sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ diff --git a/tools/testing/radix-tree/find_next_bit.c b/tools/testing/radix-tree/find_next_bit.c new file mode 100644 index 000000000000..d1c2178bb2d4 --- /dev/null +++ b/tools/testing/radix-tree/find_next_bit.c @@ -0,0 +1,57 @@ +/* find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/bitops.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c new file mode 100644 index 000000000000..154823737b20 --- /dev/null +++ b/tools/testing/radix-tree/linux.c @@ -0,0 +1,60 @@ +#include <stdlib.h> +#include <string.h> +#include <malloc.h> +#include <unistd.h> +#include <assert.h> + +#include <linux/mempool.h> +#include <linux/slab.h> +#include <urcu/uatomic.h> + +int nr_allocated; + +void *mempool_alloc(mempool_t *pool, int gfp_mask) +{ + return pool->alloc(gfp_mask, pool->data); +} + +void mempool_free(void *element, mempool_t *pool) +{ + pool->free(element, pool->data); +} + +mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data) +{ + mempool_t *ret = malloc(sizeof(*ret)); + + ret->alloc = alloc_fn; + ret->free = free_fn; + ret->data = pool_data; + return ret; +} + +void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) +{ + void *ret = malloc(cachep->size); + if (cachep->ctor) + cachep->ctor(ret); + uatomic_inc(&nr_allocated); + return ret; +} + +void kmem_cache_free(struct kmem_cache *cachep, void *objp) +{ + assert(objp); + uatomic_dec(&nr_allocated); + memset(objp, 0, cachep->size); + free(objp); +} + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void *)) +{ + struct kmem_cache *ret = malloc(sizeof(*ret)); + + ret->size = size; + ret->ctor = ctor; + return ret; +} diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h new file mode 100644 index 000000000000..71d58427ab60 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops.h @@ -0,0 +1,150 @@ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#include <linux/types.h> + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h new file mode 100644 index 000000000000..9a3274aecf83 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/__ffs.h @@ -0,0 +1,43 @@ +#ifndef _ASM_GENERIC_BITOPS___FFS_H_ +#define _ASM_GENERIC_BITOPS___FFS_H_ + +#include <asm/types.h> + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h new file mode 100644 index 000000000000..fbbb43af7dc0 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/ffs.h @@ -0,0 +1,41 @@ +#ifndef _ASM_GENERIC_BITOPS_FFS_H_ +#define _ASM_GENERIC_BITOPS_FFS_H_ + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static inline int ffs(int x) +{ + int r = 1; + + if (!x) + return 0; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h new file mode 100644 index 000000000000..6744bd4cdf46 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/ffz.h @@ -0,0 +1,12 @@ +#ifndef _ASM_GENERIC_BITOPS_FFZ_H_ +#define _ASM_GENERIC_BITOPS_FFZ_H_ + +/* + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +#define ffz(x) __ffs(~(x)) + +#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h new file mode 100644 index 000000000000..72a51e5a12ef --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/find.h @@ -0,0 +1,13 @@ +#ifndef _ASM_GENERIC_BITOPS_FIND_H_ +#define _ASM_GENERIC_BITOPS_FIND_H_ + +extern unsigned long find_next_bit(const unsigned long *addr, unsigned long + size, unsigned long offset); + +extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) + +#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h new file mode 100644 index 000000000000..850859bc5069 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/fls.h @@ -0,0 +1,41 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS_H_ +#define _ASM_GENERIC_BITOPS_FLS_H_ + +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h new file mode 100644 index 000000000000..1b6b17ce2428 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/fls64.h @@ -0,0 +1,14 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS64_H_ +#define _ASM_GENERIC_BITOPS_FLS64_H_ + +#include <asm/types.h> + +static inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +} + +#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h new file mode 100644 index 000000000000..fbbc383771da --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/hweight.h @@ -0,0 +1,11 @@ +#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_HWEIGHT_H_ + +#include <asm/types.h> + +extern unsigned int hweight32(unsigned int w); +extern unsigned int hweight16(unsigned int w); +extern unsigned int hweight8(unsigned int w); +extern unsigned long hweight64(__u64 w); + +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h new file mode 100644 index 000000000000..b9c7e5d2d2ad --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/le.h @@ -0,0 +1,53 @@ +#ifndef _ASM_GENERIC_BITOPS_LE_H_ +#define _ASM_GENERIC_BITOPS_LE_H_ + +#include <asm/types.h> +#include <asm/byteorder.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) + +#if defined(__LITTLE_ENDIAN) + +#define generic_test_le_bit(nr, addr) test_bit(nr, addr) +#define generic___set_le_bit(nr, addr) __set_bit(nr, addr) +#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr) + +#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr) +#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr) + +#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr) +#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr) + +#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset) + +#elif defined(__BIG_ENDIAN) + +#define generic_test_le_bit(nr, addr) \ + test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___set_le_bit(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___clear_le_bit(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define generic_test_and_set_le_bit(nr, addr) \ + test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic_test_and_clear_le_bit(nr, addr) \ + test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +#define generic___test_and_set_le_bit(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) +#define generic___test_and_clear_le_bit(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr)) + +extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +#else +#error "Please fix <asm/byteorder.h>" +#endif + +#define generic_find_first_zero_le_bit(addr, size) \ + generic_find_next_zero_le_bit((addr), (size), 0) + +#endif /* _ASM_GENERIC_BITOPS_LE_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h new file mode 100644 index 000000000000..46a825cf2ae1 --- /dev/null +++ b/tools/testing/radix-tree/linux/bitops/non-atomic.h @@ -0,0 +1,111 @@ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#include <asm/types.h> + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h new file mode 100644 index 000000000000..ccbe444977df --- /dev/null +++ b/tools/testing/radix-tree/linux/bug.h @@ -0,0 +1 @@ +#define WARN_ON_ONCE(x) assert(x) diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h new file mode 100644 index 000000000000..60a40459f269 --- /dev/null +++ b/tools/testing/radix-tree/linux/cpu.h @@ -0,0 +1,34 @@ + +#define hotcpu_notifier(a, b) + +#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ +#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ +#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ +#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ +#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ +#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ +#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ +#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug + * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ +#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached + * idle loop. */ +#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, + * perhaps due to preemption. */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) +#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h new file mode 100644 index 000000000000..b6afd131998d --- /dev/null +++ b/tools/testing/radix-tree/linux/export.h @@ -0,0 +1,2 @@ + +#define EXPORT_SYMBOL(sym) diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h new file mode 100644 index 000000000000..0e37f7a760eb --- /dev/null +++ b/tools/testing/radix-tree/linux/gfp.h @@ -0,0 +1,10 @@ +#ifndef _GFP_H +#define _GFP_H + +#define __GFP_BITS_SHIFT 22 +#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) +#define __GFP_WAIT 1 +#define __GFP_ACCOUNT 0 +#define __GFP_NOWARN 0 + +#endif diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h new file mode 100644 index 000000000000..ae013b0160ac --- /dev/null +++ b/tools/testing/radix-tree/linux/kernel.h @@ -0,0 +1,35 @@ +#ifndef _KERNEL_H +#define _KERNEL_H + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stddef.h> +#include <limits.h> + +#ifndef NULL +#define NULL 0 +#endif + +#define BUG_ON(expr) assert(!(expr)) +#define __init +#define __must_check +#define panic(expr) +#define printk printf +#define __force +#define likely(c) (c) +#define unlikely(c) (c) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type, member) );}) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline int in_interrupt(void) +{ + return 0; +} +#endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h new file mode 100644 index 000000000000..155f112786c4 --- /dev/null +++ b/tools/testing/radix-tree/linux/kmemleak.h @@ -0,0 +1 @@ +static inline void kmemleak_update_trace(const void *ptr) { } diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h new file mode 100644 index 000000000000..6a2dc55b41d6 --- /dev/null +++ b/tools/testing/radix-tree/linux/mempool.h @@ -0,0 +1,16 @@ + +#include <linux/slab.h> + +typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data); +typedef void (mempool_free_t)(void *element, void *pool_data); + +typedef struct { + mempool_alloc_t *alloc; + mempool_free_t *free; + void *data; +} mempool_t; + +void *mempool_alloc(mempool_t *pool, int gfp_mask); +void mempool_free(void *element, mempool_t *pool); +mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); diff --git a/tools/testing/radix-tree/linux/notifier.h b/tools/testing/radix-tree/linux/notifier.h new file mode 100644 index 000000000000..70e4797d5a46 --- /dev/null +++ b/tools/testing/radix-tree/linux/notifier.h @@ -0,0 +1,8 @@ +#ifndef _NOTIFIER_H +#define _NOTIFIER_H + +struct notifier_block; + +#define NOTIFY_OK 0x0001 /* Suits me */ + +#endif diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h new file mode 100644 index 000000000000..5837f1d56f17 --- /dev/null +++ b/tools/testing/radix-tree/linux/percpu.h @@ -0,0 +1,7 @@ + +#define DEFINE_PER_CPU(type, val) type val + +#define __get_cpu_var(var) var +#define this_cpu_ptr(var) var +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu)) diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h new file mode 100644 index 000000000000..6210672e3baa --- /dev/null +++ b/tools/testing/radix-tree/linux/preempt.h @@ -0,0 +1,4 @@ +/* */ + +#define preempt_disable() do { } while (0) +#define preempt_enable() do { } while (0) diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h new file mode 100644 index 000000000000..ce694ddd4aea --- /dev/null +++ b/tools/testing/radix-tree/linux/radix-tree.h @@ -0,0 +1 @@ +#include "../../../../include/linux/radix-tree.h" diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h new file mode 100644 index 000000000000..f7129ea2a899 --- /dev/null +++ b/tools/testing/radix-tree/linux/rcupdate.h @@ -0,0 +1,9 @@ +#ifndef _RCUPDATE_H +#define _RCUPDATE_H + +#include <urcu.h> + +#define rcu_dereference_raw(p) rcu_dereference(p) +#define rcu_dereference_protected(p, cond) rcu_dereference(p) + +#endif diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h new file mode 100644 index 000000000000..57282506c21d --- /dev/null +++ b/tools/testing/radix-tree/linux/slab.h @@ -0,0 +1,28 @@ +#ifndef SLAB_H +#define SLAB_H + +#include <linux/types.h> + +#define GFP_KERNEL 1 +#define SLAB_HWCACHE_ALIGN 1 +#define SLAB_PANIC 2 +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ + +static inline int gfpflags_allow_blocking(gfp_t mask) +{ + return 1; +} + +struct kmem_cache { + int size; + void (*ctor)(void *); +}; + +void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); +void kmem_cache_free(struct kmem_cache *cachep, void *objp); + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void *)); + +#endif /* SLAB_H */ diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h new file mode 100644 index 000000000000..72a9d85f6c76 --- /dev/null +++ b/tools/testing/radix-tree/linux/types.h @@ -0,0 +1,28 @@ +#ifndef _TYPES_H +#define _TYPES_H + +#define __rcu +#define __read_mostly + +#define BITS_PER_LONG (sizeof(long) * 8) + +struct list_head { + struct list_head *next, *prev; +}; + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +typedef struct { + unsigned int x; +} spinlock_t; + +#define uninitialized_var(x) x = x + +typedef unsigned gfp_t; +#include <linux/gfp.h> + +#endif diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c new file mode 100644 index 000000000000..0e83cad27a9f --- /dev/null +++ b/tools/testing/radix-tree/main.c @@ -0,0 +1,272 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <assert.h> + +#include <linux/slab.h> +#include <linux/radix-tree.h> + +#include "test.h" +#include "regression.h" + +void __gang_check(unsigned long middle, long down, long up, int chunk, int hop) +{ + long idx; + RADIX_TREE(tree, GFP_KERNEL); + + middle = 1 << 30; + + for (idx = -down; idx < up; idx++) + item_insert(&tree, middle + idx); + + item_check_absent(&tree, middle - down - 1); + for (idx = -down; idx < up; idx++) + item_check_present(&tree, middle + idx); + item_check_absent(&tree, middle + up); + + item_gang_check_present(&tree, middle - down, + up + down, chunk, hop); + item_full_scan(&tree, middle - down, down + up, chunk); + item_kill_tree(&tree); +} + +void gang_check(void) +{ + __gang_check(1 << 30, 128, 128, 35, 2); + __gang_check(1 << 31, 128, 128, 32, 32); + __gang_check(1 << 31, 128, 128, 32, 100); + __gang_check(1 << 31, 128, 128, 17, 7); + __gang_check(0xffff0000, 0, 65536, 17, 7); + __gang_check(0xfffffffe, 1, 1, 17, 7); +} + +void __big_gang_check(void) +{ + unsigned long start; + int wrapped = 0; + + start = 0; + do { + unsigned long old_start; + +// printf("0x%08lx\n", start); + __gang_check(start, rand() % 113 + 1, rand() % 71, + rand() % 157, rand() % 91 + 1); + old_start = start; + start += rand() % 1000000; + start %= 1ULL << 33; + if (start < old_start) + wrapped = 1; + } while (!wrapped); +} + +void big_gang_check(void) +{ + int i; + + for (i = 0; i < 1000; i++) { + __big_gang_check(); + srand(time(0)); + printf("%d ", i); + fflush(stdout); + } +} + +void add_and_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 44); + item_check_present(&tree, 44); + item_check_absent(&tree, 43); + item_kill_tree(&tree); +} + +void dynamic_height_check(void) +{ + int i; + RADIX_TREE(tree, GFP_KERNEL); + tree_verify_min_height(&tree, 0); + + item_insert(&tree, 42); + tree_verify_min_height(&tree, 42); + + item_insert(&tree, 1000000); + tree_verify_min_height(&tree, 1000000); + + assert(item_delete(&tree, 1000000)); + tree_verify_min_height(&tree, 42); + + assert(item_delete(&tree, 42)); + tree_verify_min_height(&tree, 0); + + for (i = 0; i < 1000; i++) { + item_insert(&tree, i); + tree_verify_min_height(&tree, i); + } + + i--; + for (;;) { + assert(item_delete(&tree, i)); + if (i == 0) { + tree_verify_min_height(&tree, 0); + break; + } + i--; + tree_verify_min_height(&tree, i); + } + + item_kill_tree(&tree); +} + +void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag) +{ + int i; + + for (i = 0; i < count; i++) { +/* if (i % 1000 == 0) + putchar('.'); */ + if (idx[i] < start || idx[i] > end) { + if (item_tag_get(tree, idx[i], totag)) { + printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag)); + } + assert(!item_tag_get(tree, idx[i], totag)); + continue; + } + if (item_tag_get(tree, idx[i], fromtag) ^ + item_tag_get(tree, idx[i], totag)) { + printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag)); + } + assert(!(item_tag_get(tree, idx[i], fromtag) ^ + item_tag_get(tree, idx[i], totag))); + } +} + +#define ITEMS 50000 + +void copy_tag_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + unsigned long idx[ITEMS]; + unsigned long start, end, count = 0, tagged, cur, tmp; + int i; + +// printf("generating radix tree indices...\n"); + start = rand(); + end = rand(); + if (start > end && (rand() % 10)) { + cur = start; + start = end; + end = cur; + } + /* Specifically create items around the start and the end of the range + * with high probability to check for off by one errors */ + cur = rand(); + if (cur & 1) { + item_insert(&tree, start); + if (cur & 2) { + if (start <= end) + count++; + item_tag_set(&tree, start, 0); + } + } + if (cur & 4) { + item_insert(&tree, start-1); + if (cur & 8) + item_tag_set(&tree, start-1, 0); + } + if (cur & 16) { + item_insert(&tree, end); + if (cur & 32) { + if (start <= end) + count++; + item_tag_set(&tree, end, 0); + } + } + if (cur & 64) { + item_insert(&tree, end+1); + if (cur & 128) + item_tag_set(&tree, end+1, 0); + } + + for (i = 0; i < ITEMS; i++) { + do { + idx[i] = rand(); + } while (item_lookup(&tree, idx[i])); + + item_insert(&tree, idx[i]); + if (rand() & 1) { + item_tag_set(&tree, idx[i], 0); + if (idx[i] >= start && idx[i] <= end) + count++; + } +/* if (i % 1000 == 0) + putchar('.'); */ + } + +// printf("\ncopying tags...\n"); + cur = start; + tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1); + +// printf("checking copied tags\n"); + assert(tagged == count); + check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1); + + /* Copy tags in several rounds */ +// printf("\ncopying tags...\n"); + cur = start; + do { + tmp = rand() % (count/10+2); + tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2); + } while (tmp == tagged); + +// printf("%lu %lu %lu\n", tagged, tmp, count); +// printf("checking copied tags\n"); + check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2); + assert(tagged < tmp); + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + verify_tag_consistency(&tree, 2); +// printf("\n"); + item_kill_tree(&tree); +} + +static void single_thread_tests(void) +{ + int i; + + tag_check(); + printf("after tag_check: %d allocated\n", nr_allocated); + gang_check(); + printf("after gang_check: %d allocated\n", nr_allocated); + add_and_check(); + printf("after add_and_check: %d allocated\n", nr_allocated); + dynamic_height_check(); + printf("after dynamic_height_check: %d allocated\n", nr_allocated); + big_gang_check(); + printf("after big_gang_check: %d allocated\n", nr_allocated); + for (i = 0; i < 2000; i++) { + copy_tag_check(); + printf("%d ", i); + fflush(stdout); + } + printf("after copy_tag_check: %d allocated\n", nr_allocated); +} + +int main(void) +{ + rcu_register_thread(); + radix_tree_init(); + + regression1_test(); + regression2_test(); + regression3_test(); + single_thread_tests(); + + sleep(1); + printf("after sleep(1): %d allocated\n", nr_allocated); + rcu_unregister_thread(); + + exit(0); +} diff --git a/tools/testing/radix-tree/rcupdate.c b/tools/testing/radix-tree/rcupdate.c new file mode 100644 index 000000000000..31a2d14225d6 --- /dev/null +++ b/tools/testing/radix-tree/rcupdate.c @@ -0,0 +1,86 @@ +#include <linux/rcupdate.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +static pthread_mutex_t rculock = PTHREAD_MUTEX_INITIALIZER; +static struct rcu_head *rcuhead_global = NULL; +static __thread int nr_rcuhead = 0; +static __thread struct rcu_head *rcuhead = NULL; +static __thread struct rcu_head *rcutail = NULL; + +static pthread_cond_t rcu_worker_cond = PTHREAD_COND_INITIALIZER; + +/* switch to urcu implementation when it is merged. */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)) +{ + head->func = func; + head->next = rcuhead; + rcuhead = head; + if (!rcutail) + rcutail = head; + nr_rcuhead++; + if (nr_rcuhead >= 1000) { + int signal = 0; + + pthread_mutex_lock(&rculock); + if (!rcuhead_global) + signal = 1; + rcutail->next = rcuhead_global; + rcuhead_global = head; + pthread_mutex_unlock(&rculock); + + nr_rcuhead = 0; + rcuhead = NULL; + rcutail = NULL; + + if (signal) { + pthread_cond_signal(&rcu_worker_cond); + } + } +} + +static void *rcu_worker(void *arg) +{ + struct rcu_head *r; + + rcupdate_thread_init(); + + while (1) { + pthread_mutex_lock(&rculock); + while (!rcuhead_global) { + pthread_cond_wait(&rcu_worker_cond, &rculock); + } + r = rcuhead_global; + rcuhead_global = NULL; + + pthread_mutex_unlock(&rculock); + + synchronize_rcu(); + + while (r) { + struct rcu_head *tmp = r->next; + r->func(r); + r = tmp; + } + } + + rcupdate_thread_exit(); + + return NULL; +} + +static pthread_t worker_thread; +void rcupdate_init(void) +{ + pthread_create(&worker_thread, NULL, rcu_worker, NULL); +} + +void rcupdate_thread_init(void) +{ + rcu_register_thread(); +} +void rcupdate_thread_exit(void) +{ + rcu_unregister_thread(); +} diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h new file mode 100644 index 000000000000..e018c4816688 --- /dev/null +++ b/tools/testing/radix-tree/regression.h @@ -0,0 +1,8 @@ +#ifndef __REGRESSION_H__ +#define __REGRESSION_H__ + +void regression1_test(void); +void regression2_test(void); +void regression3_test(void); + +#endif diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c new file mode 100644 index 000000000000..2d03a63bb79c --- /dev/null +++ b/tools/testing/radix-tree/regression1.c @@ -0,0 +1,220 @@ +/* + * Regression1 + * Description: + * Salman Qazi describes the following radix-tree bug: + * + * In the following case, we get can get a deadlock: + * + * 0. The radix tree contains two items, one has the index 0. + * 1. The reader (in this case find_get_pages) takes the rcu_read_lock. + * 2. The reader acquires slot(s) for item(s) including the index 0 item. + * 3. The non-zero index item is deleted, and as a consequence the other item + * is moved to the root of the tree. The place where it used to be is queued + * for deletion after the readers finish. + * 3b. The zero item is deleted, removing it from the direct slot, it remains in + * the rcu-delayed indirect node. + * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref + * count + * 5. The reader looks at it again, hoping that the item will either be freed + * or the ref count will increase. This never happens, as the slot it is + * looking at will never be updated. Also, this slot can never be reclaimed + * because the reader is holding rcu_read_lock and is in an infinite loop. + * + * The fix is to re-use the same "indirect" pointer case that requires a slot + * lookup retry into a general "retry the lookup" bit. + * + * Running: + * This test should run to completion in a few seconds. The above bug would + * cause it to hang indefinitely. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> +#include <stdlib.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +#include "regression.h" + +static RADIX_TREE(mt_tree, GFP_KERNEL); +static pthread_mutex_t mt_lock; + +struct page { + pthread_mutex_t lock; + struct rcu_head rcu; + int count; + unsigned long index; +}; + +static struct page *page_alloc(void) +{ + struct page *p; + p = malloc(sizeof(struct page)); + p->count = 1; + p->index = 1; + pthread_mutex_init(&p->lock, NULL); + + return p; +} + +static void page_rcu_free(struct rcu_head *rcu) +{ + struct page *p = container_of(rcu, struct page, rcu); + assert(!p->count); + pthread_mutex_destroy(&p->lock); + free(p); +} + +static void page_free(struct page *p) +{ + call_rcu(&p->rcu, page_rcu_free); +} + +static unsigned find_get_pages(unsigned long start, + unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + unsigned int ret; + unsigned int nr_found; + + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mt_tree, + (void ***)pages, NULL, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + assert((start | i) == 0); + goto restart; + } + /* + * No exceptional entries are inserted in this test. + */ + assert(0); + } + + pthread_mutex_lock(&page->lock); + if (!page->count) { + pthread_mutex_unlock(&page->lock); + goto repeat; + } + /* don't actually update page refcount */ + pthread_mutex_unlock(&page->lock); + + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + goto repeat; + } + + pages[ret] = page; + ret++; + } + rcu_read_unlock(); + return ret; +} + +static pthread_barrier_t worker_barrier; + +static void *regression1_fn(void *arg) +{ + rcu_register_thread(); + + if (pthread_barrier_wait(&worker_barrier) == + PTHREAD_BARRIER_SERIAL_THREAD) { + int j; + + for (j = 0; j < 1000000; j++) { + struct page *p; + + p = page_alloc(); + pthread_mutex_lock(&mt_lock); + radix_tree_insert(&mt_tree, 0, p); + pthread_mutex_unlock(&mt_lock); + + p = page_alloc(); + pthread_mutex_lock(&mt_lock); + radix_tree_insert(&mt_tree, 1, p); + pthread_mutex_unlock(&mt_lock); + + pthread_mutex_lock(&mt_lock); + p = radix_tree_delete(&mt_tree, 1); + pthread_mutex_lock(&p->lock); + p->count--; + pthread_mutex_unlock(&p->lock); + pthread_mutex_unlock(&mt_lock); + page_free(p); + + pthread_mutex_lock(&mt_lock); + p = radix_tree_delete(&mt_tree, 0); + pthread_mutex_lock(&p->lock); + p->count--; + pthread_mutex_unlock(&p->lock); + pthread_mutex_unlock(&mt_lock); + page_free(p); + } + } else { + int j; + + for (j = 0; j < 100000000; j++) { + struct page *pages[10]; + + find_get_pages(0, 10, pages); + } + } + + rcu_unregister_thread(); + + return NULL; +} + +static pthread_t *threads; +void regression1_test(void) +{ + int nr_threads; + int i; + long arg; + + /* Regression #1 */ + printf("running regression test 1, should finish in under a minute\n"); + nr_threads = 2; + pthread_barrier_init(&worker_barrier, NULL, nr_threads); + + threads = malloc(nr_threads * sizeof(pthread_t *)); + + for (i = 0; i < nr_threads; i++) { + arg = i; + if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) { + perror("pthread_create"); + exit(1); + } + } + + for (i = 0; i < nr_threads; i++) { + if (pthread_join(threads[i], NULL)) { + perror("pthread_join"); + exit(1); + } + } + + free(threads); + + printf("regression test 1, done\n"); +} diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c new file mode 100644 index 000000000000..5d2fa28cdca3 --- /dev/null +++ b/tools/testing/radix-tree/regression2.c @@ -0,0 +1,126 @@ +/* + * Regression2 + * Description: + * Toshiyuki Okajima describes the following radix-tree bug: + * + * In the following case, we can get a hangup on + * radix_radix_tree_gang_lookup_tag_slot. + * + * 0. The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of + * a certain item has PAGECACHE_TAG_DIRTY. + * 1. radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY, + * PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag + * for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with + * PAGECACHE_TAG_DIRTY within the range from start to end. As the result, + * There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has + * PAGECACHE_TAG_TOWRITE. + * 2. An item is added into the radix tree and then the level of it is + * extended into 2 from 1. At that time, the new radix tree node succeeds + * the tag status of the root tag. Therefore the tag of the new radix tree + * node has PAGECACHE_TAG_TOWRITE but there is not slot with + * PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node. + * 3. The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY. + * 4. All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are + * released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the + * radix tree.) As the result, the slot of the radix tree node is NULL but + * the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE. + * 5. radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls + * __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't + * change the index that is the input and output parameter. Because the 1st + * slot of the radix tree node is NULL, but the tag which corresponds to + * the slot has PAGECACHE_TAG_TOWRITE. + * Therefore radix_tree_gang_lookup_tag_slot tries to get some items by + * calling __lookup_tag, but it cannot get any items forever. + * + * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag + * if it doesn't set any tags within the specified range. + * + * Running: + * This test should run to completion immediately. The above bug would cause it + * to hang indefinitely. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <stdlib.h> +#include <stdio.h> + +#include "regression.h" + +#ifdef __KERNEL__ +#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#else +#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ +#endif + +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define PAGECACHE_TAG_DIRTY 0 +#define PAGECACHE_TAG_WRITEBACK 1 +#define PAGECACHE_TAG_TOWRITE 2 + +static RADIX_TREE(mt_tree, GFP_KERNEL); +unsigned long page_count = 0; + +struct page { + unsigned long index; +}; + +static struct page *page_alloc(void) +{ + struct page *p; + p = malloc(sizeof(struct page)); + p->index = page_count++; + + return p; +} + +void regression2_test(void) +{ + int i; + struct page *p; + int max_slots = RADIX_TREE_MAP_SIZE; + unsigned long int start, end; + struct page *pages[1]; + + printf("running regression test 2 (should take milliseconds)\n"); + /* 0. */ + for (i = 0; i <= max_slots - 1; i++) { + p = page_alloc(); + radix_tree_insert(&mt_tree, i, p); + } + radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); + + /* 1. */ + start = 0; + end = max_slots - 2; + radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1, + PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); + + /* 2. */ + p = page_alloc(); + radix_tree_insert(&mt_tree, max_slots, p); + + /* 3. */ + radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY); + + /* 4. */ + for (i = max_slots - 1; i >= 0; i--) + radix_tree_delete(&mt_tree, i); + + /* 5. */ + // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot + // can return. + start = 1; + end = max_slots - 2; + radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end, + PAGECACHE_TAG_TOWRITE); + + /* We remove all the remained nodes */ + radix_tree_delete(&mt_tree, max_slots); + + printf("regression test 2, done\n"); +} diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c new file mode 100644 index 000000000000..1f06ed73d0a8 --- /dev/null +++ b/tools/testing/radix-tree/regression3.c @@ -0,0 +1,117 @@ +/* + * Regression3 + * Description: + * Helper radix_tree_iter_retry resets next_index to the current index. + * In following radix_tree_next_slot current chunk size becomes zero. + * This isn't checked and it tries to dereference null pointer in slot. + * + * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1, + * for tagger iteraction it also must reset cached tags in iterator to abort + * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk. + * + * Running: + * This test should run to completion immediately. The above bug would + * cause it to segfault. + * + * Upstream commit: + * Not yet + */ +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <stdlib.h> +#include <stdio.h> + +#include "regression.h" + +void regression3_test(void) +{ + RADIX_TREE(root, GFP_KERNEL); + void *ptr0 = (void *)4ul; + void *ptr = (void *)8ul; + struct radix_tree_iter iter; + void **slot; + bool first; + + printf("running regression test 3 (should take milliseconds)\n"); + + radix_tree_insert(&root, 0, ptr0); + radix_tree_tag_set(&root, 0, 0); + + first = true; + radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) { + printf("tagged %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + radix_tree_tag_set(&root, 1, 0); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printf("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + radix_tree_delete(&root, 1); + + first = true; + radix_tree_for_each_slot(slot, &root, &iter, 0) { + printf("slot %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printk("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + radix_tree_delete(&root, 1); + + first = true; + radix_tree_for_each_contig(slot, &root, &iter, 0) { + printk("contig %ld %p\n", iter.index, *slot); + if (first) { + radix_tree_insert(&root, 1, ptr); + first = false; + } + if (radix_tree_deref_retry(*slot)) { + printk("retry at %ld\n", iter.index); + slot = radix_tree_iter_retry(&iter); + continue; + } + } + + radix_tree_for_each_slot(slot, &root, &iter, 0) { + printf("slot %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_for_each_contig(slot, &root, &iter, 0) { + printf("contig %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_tag_set(&root, 0, 0); + radix_tree_tag_set(&root, 1, 0); + radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) { + printf("tagged %ld %p\n", iter.index, *slot); + if (!iter.index) { + printf("next at %ld\n", iter.index); + slot = radix_tree_iter_next(&iter); + } + } + + radix_tree_delete(&root, 0); + radix_tree_delete(&root, 1); + + printf("regression test 3 passed\n"); +} diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c new file mode 100644 index 000000000000..83136be552a0 --- /dev/null +++ b/tools/testing/radix-tree/tag_check.c @@ -0,0 +1,332 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include <linux/slab.h> +#include <linux/radix-tree.h> + +#include "test.h" + + +static void +__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) +{ + int ret; + + item_check_absent(tree, index); + assert(item_tag_get(tree, index, tag) == 0); + + item_insert(tree, index); + assert(item_tag_get(tree, index, tag) == 0); + item_tag_set(tree, index, tag); + ret = item_tag_get(tree, index, tag); + assert(ret != 0); + ret = item_delete(tree, index); + assert(ret != 0); + item_insert(tree, index); + ret = item_tag_get(tree, index, tag); + assert(ret == 0); + ret = item_delete(tree, index); + assert(ret != 0); + ret = item_delete(tree, index); + assert(ret == 0); +} + +void simple_checks(void) +{ + unsigned long index; + RADIX_TREE(tree, GFP_KERNEL); + + for (index = 0; index < 10000; index++) { + __simple_checks(&tree, index, 0); + __simple_checks(&tree, index, 1); + } + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + printf("before item_kill_tree: %d allocated\n", nr_allocated); + item_kill_tree(&tree); + printf("after item_kill_tree: %d allocated\n", nr_allocated); +} + +/* + * Check that tags propagate correctly when extending a tree. + */ +static void extend_checks(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 43); + assert(item_tag_get(&tree, 43, 0) == 0); + item_tag_set(&tree, 43, 0); + assert(item_tag_get(&tree, 43, 0) == 1); + item_insert(&tree, 1000000); + assert(item_tag_get(&tree, 43, 0) == 1); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + item_delete(&tree, 1000000); + assert(item_tag_get(&tree, 43, 0) != 0); + item_delete(&tree, 43); + assert(item_tag_get(&tree, 43, 0) == 0); /* crash */ + assert(item_tag_get(&tree, 0, 0) == 1); + + verify_tag_consistency(&tree, 0); + + item_kill_tree(&tree); +} + +/* + * Check that tags propagate correctly when contracting a tree. + */ +static void contract_checks(void) +{ + struct item *item; + int tmp; + RADIX_TREE(tree, GFP_KERNEL); + + tmp = 1<<RADIX_TREE_MAP_SHIFT; + item_insert(&tree, tmp); + item_insert(&tree, tmp+1); + item_tag_set(&tree, tmp, 0); + item_tag_set(&tree, tmp, 1); + item_tag_set(&tree, tmp+1, 0); + item_delete(&tree, tmp+1); + item_tag_clear(&tree, tmp, 1); + + assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1); + assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0); + + assert(item_tag_get(&tree, tmp, 0) == 1); + assert(item_tag_get(&tree, tmp, 1) == 0); + + verify_tag_consistency(&tree, 0); + item_kill_tree(&tree); +} + +/* + * Stupid tag thrasher + * + * Create a large linear array corresponding to the tree. Each element in + * the array is coherent with each node in the tree + */ + +enum { + NODE_ABSENT = 0, + NODE_PRESENT = 1, + NODE_TAGGED = 2, +}; + +#define THRASH_SIZE 1000 * 1000 +#define N 127 +#define BATCH 33 + +static void gang_check(struct radix_tree_root *tree, + char *thrash_state, int tag) +{ + struct item *items[BATCH]; + int nr_found; + unsigned long index = 0; + unsigned long last_index = 0; + + while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items, + index, BATCH, tag))) { + int i; + + for (i = 0; i < nr_found; i++) { + struct item *item = items[i]; + + while (last_index < item->index) { + assert(thrash_state[last_index] != NODE_TAGGED); + last_index++; + } + assert(thrash_state[last_index] == NODE_TAGGED); + last_index++; + } + index = items[nr_found - 1]->index + 1; + } +} + +static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag) +{ + int insert_chunk; + int delete_chunk; + int tag_chunk; + int untag_chunk; + int total_tagged = 0; + int total_present = 0; + + for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N) + for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N) + for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N) + for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) { + int i; + unsigned long index; + int nr_inserted = 0; + int nr_deleted = 0; + int nr_tagged = 0; + int nr_untagged = 0; + int actual_total_tagged; + int actual_total_present; + + for (i = 0; i < insert_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_ABSENT) + continue; + item_check_absent(tree, index); + item_insert(tree, index); + assert(thrash_state[index] != NODE_PRESENT); + thrash_state[index] = NODE_PRESENT; + nr_inserted++; + total_present++; + } + + for (i = 0; i < delete_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] == NODE_ABSENT) + continue; + item_check_present(tree, index); + if (item_tag_get(tree, index, tag)) { + assert(thrash_state[index] == NODE_TAGGED); + total_tagged--; + } else { + assert(thrash_state[index] == NODE_PRESENT); + } + item_delete(tree, index); + assert(thrash_state[index] != NODE_ABSENT); + thrash_state[index] = NODE_ABSENT; + nr_deleted++; + total_present--; + } + + for (i = 0; i < tag_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_PRESENT) { + if (item_lookup(tree, index)) + assert(item_tag_get(tree, index, tag)); + continue; + } + item_tag_set(tree, index, tag); + item_tag_set(tree, index, tag); + assert(thrash_state[index] != NODE_TAGGED); + thrash_state[index] = NODE_TAGGED; + nr_tagged++; + total_tagged++; + } + + for (i = 0; i < untag_chunk; i++) { + index = rand() % THRASH_SIZE; + if (thrash_state[index] != NODE_TAGGED) + continue; + item_check_present(tree, index); + assert(item_tag_get(tree, index, tag)); + item_tag_clear(tree, index, tag); + item_tag_clear(tree, index, tag); + assert(thrash_state[index] != NODE_PRESENT); + thrash_state[index] = NODE_PRESENT; + nr_untagged++; + total_tagged--; + } + + actual_total_tagged = 0; + actual_total_present = 0; + for (index = 0; index < THRASH_SIZE; index++) { + switch (thrash_state[index]) { + case NODE_ABSENT: + item_check_absent(tree, index); + break; + case NODE_PRESENT: + item_check_present(tree, index); + assert(!item_tag_get(tree, index, tag)); + actual_total_present++; + break; + case NODE_TAGGED: + item_check_present(tree, index); + assert(item_tag_get(tree, index, tag)); + actual_total_present++; + actual_total_tagged++; + break; + } + } + + gang_check(tree, thrash_state, tag); + + printf("%d(%d) %d(%d) %d(%d) %d(%d) / " + "%d(%d) present, %d(%d) tagged\n", + insert_chunk, nr_inserted, + delete_chunk, nr_deleted, + tag_chunk, nr_tagged, + untag_chunk, nr_untagged, + total_present, actual_total_present, + total_tagged, actual_total_tagged); + } +} + +static void thrash_tags(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + char *thrash_state; + + thrash_state = malloc(THRASH_SIZE); + memset(thrash_state, 0, THRASH_SIZE); + + do_thrash(&tree, thrash_state, 0); + + verify_tag_consistency(&tree, 0); + item_kill_tree(&tree); + free(thrash_state); +} + +static void leak_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 1000000); + item_delete(&tree, 1000000); + item_kill_tree(&tree); +} + +static void __leak_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_insert(&tree, 1000000); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_delete(&tree, 1000000); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); + item_kill_tree(&tree); + printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated); +} + +static void single_check(void) +{ + struct item *items[BATCH]; + RADIX_TREE(tree, GFP_KERNEL); + int ret; + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0); + assert(ret == 1); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0); + assert(ret == 0); + verify_tag_consistency(&tree, 0); + verify_tag_consistency(&tree, 1); + item_kill_tree(&tree); +} + +void tag_check(void) +{ + single_check(); + extend_checks(); + contract_checks(); + printf("after extend_checks: %d allocated\n", nr_allocated); + __leak_check(); + leak_check(); + printf("after leak_check: %d allocated\n", nr_allocated); + simple_checks(); + printf("after simple_checks: %d allocated\n", nr_allocated); + thrash_tags(); + printf("after thrash_tags: %d allocated\n", nr_allocated); +} diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c new file mode 100644 index 000000000000..2bebf34cdc27 --- /dev/null +++ b/tools/testing/radix-tree/test.c @@ -0,0 +1,219 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "test.h" + +struct item * +item_tag_set(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_set(root, index, tag); +} + +struct item * +item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_clear(root, index, tag); +} + +int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) +{ + return radix_tree_tag_get(root, index, tag); +} + +int __item_insert(struct radix_tree_root *root, struct item *item) +{ + return radix_tree_insert(root, item->index, item); +} + +int item_insert(struct radix_tree_root *root, unsigned long index) +{ + return __item_insert(root, item_create(index)); +} + +int item_delete(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + assert(item->index == index); + free(item); + return 1; + } + return 0; +} + +struct item *item_create(unsigned long index) +{ + struct item *ret = malloc(sizeof(*ret)); + + ret->index = index; + return ret; +} + +void item_check_present(struct radix_tree_root *root, unsigned long index) +{ + struct item *item; + + item = radix_tree_lookup(root, index); + assert(item != 0); + assert(item->index == index); +} + +struct item *item_lookup(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_lookup(root, index); +} + +void item_check_absent(struct radix_tree_root *root, unsigned long index) +{ + struct item *item; + + item = radix_tree_lookup(root, index); + assert(item == 0); +} + +/* + * Scan only the passed (start, start+nr] for present items + */ +void item_gang_check_present(struct radix_tree_root *root, + unsigned long start, unsigned long nr, + int chunk, int hop) +{ + struct item *items[chunk]; + unsigned long into; + + for (into = 0; into < nr; ) { + int nfound; + int nr_to_find = chunk; + int i; + + if (nr_to_find > (nr - into)) + nr_to_find = nr - into; + + nfound = radix_tree_gang_lookup(root, (void **)items, + start + into, nr_to_find); + assert(nfound == nr_to_find); + for (i = 0; i < nfound; i++) + assert(items[i]->index == start + into + i); + into += hop; + } +} + +/* + * Scan the entire tree, only expecting present items (start, start+nr] + */ +void item_full_scan(struct radix_tree_root *root, unsigned long start, + unsigned long nr, int chunk) +{ + struct item *items[chunk]; + unsigned long into = 0; + unsigned long this_index = start; + int nfound; + int i; + +// printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk); + + while ((nfound = radix_tree_gang_lookup(root, (void **)items, into, + chunk))) { +// printf("At 0x%08lx, nfound=%d\n", into, nfound); + for (i = 0; i < nfound; i++) { + assert(items[i]->index == this_index); + this_index++; + } +// printf("Found 0x%08lx->0x%08lx\n", +// items[0]->index, items[nfound-1]->index); + into = this_index; + } + if (chunk) + assert(this_index == start + nr); + nfound = radix_tree_gang_lookup(root, (void **)items, + this_index, chunk); + assert(nfound == 0); +} + +static int verify_node(struct radix_tree_node *slot, unsigned int tag, + unsigned int height, int tagged) +{ + int anyset = 0; + int i; + int j; + + slot = indirect_to_ptr(slot); + + /* Verify consistency at this level */ + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { + if (slot->tags[tag][i]) { + anyset = 1; + break; + } + } + if (tagged != anyset) { + printf("tag: %u, height %u, tagged: %d, anyset: %d\n", tag, height, tagged, anyset); + for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { + printf("tag %d: ", j); + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) + printf("%016lx ", slot->tags[j][i]); + printf("\n"); + } + return 1; + } + assert(tagged == anyset); + + /* Go for next level */ + if (height > 1) { + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) + if (slot->slots[i]) + if (verify_node(slot->slots[i], tag, height - 1, + !!test_bit(i, slot->tags[tag]))) { + printf("Failure at off %d\n", i); + for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { + printf("tag %d: ", j); + for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) + printf("%016lx ", slot->tags[j][i]); + printf("\n"); + } + return 1; + } + } + return 0; +} + +void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) +{ + if (!root->height) + return; + verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag)); +} + +void item_kill_tree(struct radix_tree_root *root) +{ + struct item *items[32]; + int nfound; + + while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) { + int i; + + for (i = 0; i < nfound; i++) { + void *ret; + + ret = radix_tree_delete(root, items[i]->index); + assert(ret == items[i]); + free(items[i]); + } + } + assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0); + assert(root->rnode == NULL); +} + +void tree_verify_min_height(struct radix_tree_root *root, int maxindex) +{ + assert(radix_tree_maxindex(root->height) >= maxindex); + if (root->height > 1) + assert(radix_tree_maxindex(root->height-1) < maxindex); + else if (root->height == 1) + assert(radix_tree_maxindex(root->height-1) <= maxindex); +} diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h new file mode 100644 index 000000000000..4e1d95faaa94 --- /dev/null +++ b/tools/testing/radix-tree/test.h @@ -0,0 +1,40 @@ +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> + +struct item { + unsigned long index; +}; + +struct item *item_create(unsigned long index); +int __item_insert(struct radix_tree_root *root, struct item *item); +int item_insert(struct radix_tree_root *root, unsigned long index); +int item_delete(struct radix_tree_root *root, unsigned long index); +struct item *item_lookup(struct radix_tree_root *root, unsigned long index); + +void item_check_present(struct radix_tree_root *root, unsigned long index); +void item_check_absent(struct radix_tree_root *root, unsigned long index); +void item_gang_check_present(struct radix_tree_root *root, + unsigned long start, unsigned long nr, + int chunk, int hop); +void item_full_scan(struct radix_tree_root *root, unsigned long start, + unsigned long nr, int chunk); +void item_kill_tree(struct radix_tree_root *root); + +void tag_check(void); + +struct item * +item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); +struct item * +item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag); +int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag); +void tree_verify_min_height(struct radix_tree_root *root, int maxindex); +void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); + +extern int nr_allocated; + +/* Normally private parts of lib/radix-tree.c */ +void *indirect_to_ptr(void *ptr); +int root_tag_get(struct radix_tree_root *root, unsigned int tag); +unsigned long radix_tree_maxindex(unsigned int height); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b04afc3295df..ff9e5f20a5a7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -19,6 +19,7 @@ TARGETS += powerpc TARGETS += pstore TARGETS += ptrace TARGETS += seccomp +TARGETS += sigaltstack TARGETS += size TARGETS += static_keys TARGETS += sysctl diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5d8cd06d920f..c37262f6c269 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -14,3 +14,12 @@ enable_tracing() { # start trace recording reset_tracer() { # reset the current tracer echo nop > current_tracer } + +reset_trigger() { # reset all current setting triggers + grep -v ^# events/*/*/trigger | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "` + echo "!$cmd" > `echo $line | cut -f1 -d:` + done +} + diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc new file mode 100644 index 000000000000..1a9445021bf1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -0,0 +1,64 @@ +#!/bin/sh +# description: event trigger - test event enable/disable trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep enable_event events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "event enable/disable trigger is not supported" + exit_unsupported +fi + +echo "Test enable_event trigger" +echo 0 > events/sched/sched_switch/enable +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '1*' ]; then + fail "enable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test disable_event trigger" +echo 1 > events/sched/sched_switch/enable +echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '0*' ]; then + fail "disable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for event enable/disable trigger" +! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger +! echo 'disable_event+1' > events/sched/sched_process_fork/trigger +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc new file mode 100644 index 000000000000..514e466e198b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -0,0 +1,59 @@ +#!/bin/sh +# description: event trigger - test trigger filter + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test trigger filter" +echo 1 > tracing_on +echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for trigger filter" +! echo 'traceoff if a' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger + + + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc new file mode 100644 index 000000000000..c2b61c4fda11 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -0,0 +1,75 @@ +#!/bin/sh +# description: event trigger - test histogram modifiers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram with execname modifier" + +echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "execname modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with hex modifier" + +echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +# Note that $$ is the parent pid. $PID is current PID. +HEX=`printf %x $PID` +grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \ + fail "hex modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with syscall modifier" + +echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ + fail "syscall modifier on raw_syscalls/sys_exit did not work" + + +reset_trigger + +echo "Test histgram with log2 modifier" + +echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \ + fail "log2 modifier on kmem/kmalloc did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc new file mode 100644 index 000000000000..b2902d42a537 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -0,0 +1,83 @@ +#!/bin/sh +# description: event trigger - test histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram basic tigger" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with compound keys" + +echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \ + fail "compound keys on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with string key" + +echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with sort key" + +echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done + +check_inc() { + while [ $# -gt 1 ]; do + [ $1 -gt $2 ] && return 1 + shift 1 + done + return 0 +} +check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \ + events/sched/sched_process_fork/hist | cut -d: -f2 ` || + fail "sort param on sched_process_fork did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc new file mode 100644 index 000000000000..03c4a46561fc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -0,0 +1,73 @@ +#!/bin/sh +# description: event trigger - test multiple histogram triggers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +reset_trigger + +echo "Test histogram multiple tiggers" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with its name" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_fork/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +echo "Test same named histogram on different events" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_exit/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l` +test $diffs -eq 0 || fail "Same name histograms are not same" + +reset_trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc new file mode 100644 index 000000000000..f84b80d551a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -0,0 +1,56 @@ +#!/bin/sh +# description: event trigger - test snapshot-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep snapshot events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "snapshot trigger is not supported" + exit_unsupported +fi + +echo "Test snapshot tigger" +echo 0 > snapshot +echo 1 > events/sched/sched_process_fork/enable +( echo "forked") +echo 'snapshot:1' > events/sched/sched_process_fork/trigger +( echo "forked") +grep sched_process_fork snapshot > /dev/null || \ + fail "snapshot trigger on sched_process_fork did not work" + +reset_trigger +echo 0 > snapshot +echo 0 > events/sched/sched_process_fork/enable + +echo "Test snapshot semantic errors" + +! echo "snapshot+1" > events/sched/sched_process_fork/trigger +echo "snapshot" > events/sched/sched_process_fork/trigger +! echo "snapshot" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc new file mode 100644 index 000000000000..9fa23b085def --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -0,0 +1,53 @@ +#!/bin/sh +# description: event trigger - test stacktrace-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "stacktrace trigger is not supported" + exit_unsupported +fi + +echo "Test stacktrace tigger" +echo 0 > trace +echo 0 > options/stacktrace +echo 'stacktrace' > events/sched/sched_process_fork/trigger +( echo "forked") +grep "<stack trace>" trace > /dev/null || \ + fail "stacktrace trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test stacktrace semantic errors" + +! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger +echo "stacktrace" > events/sched/sched_process_fork/trigger +! echo "stacktrace" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc new file mode 100644 index 000000000000..87648e5f987c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test traceon/off trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test traceoff trigger" +echo 1 > tracing_on +echo 'traceoff' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 0 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test traceon trigger" +echo 0 > tracing_on +echo 'traceon' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for traceoff/on trigger" +! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger +! echo 'traceoff+0' > events/sched/sched_process_fork/trigger +echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceoff' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 47147b968514..08360060ab14 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh +TEST_PROGS := printf.sh bitmap.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh new file mode 100755 index 000000000000..2da187b6ddad --- /dev/null +++ b/tools/testing/selftests/lib/bitmap.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs bitmap infrastructure tests using test_bitmap kernel module + +if /sbin/modprobe -q test_bitmap; then + /sbin/modprobe -q -r test_bitmap + echo "bitmap: ok" +else + echo "bitmap: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 6fb23366b258..0840684deb7d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,3 +2,5 @@ socket psock_fanout psock_tpacket reuseport_bpf +reuseport_bpf_cpu +reuseport_dualstack diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 41449b5ad0a9..0e5340742620 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf +NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack all: $(NET_PROGS) %: %.c diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index bec1b5dd2530..96ba386b1b7b 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -9,10 +9,12 @@ #include <errno.h> #include <error.h> +#include <fcntl.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/unistd.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -169,9 +171,15 @@ static void build_recv_group(const struct test_params p, int fd[], uint16_t mod, if (bind(fd[i], addr, sockaddr_size())) error(1, errno, "failed to bind recv socket %d", i); - if (p.protocol == SOCK_STREAM) + if (p.protocol == SOCK_STREAM) { + opt = 4; + if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt, + sizeof(opt))) + error(1, errno, + "failed to set TCP_FASTOPEN on %d", i); if (listen(fd[i], p.recv_socks * 10)) error(1, errno, "failed to listen on socket"); + } } free(addr); } @@ -189,10 +197,8 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); - if (connect(fd, daddr, sockaddr_size())) - error(1, errno, "failed to connect"); - if (send(fd, buf, len, 0) < 0) + if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0) error(1, errno, "failed to send message"); close(fd); @@ -260,7 +266,7 @@ static void test_recv_order(const struct test_params p, int fd[], int mod) } } -static void test_reuseport_ebpf(const struct test_params p) +static void test_reuseport_ebpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -268,6 +274,7 @@ static void test_reuseport_ebpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_ebpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_ebpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -276,7 +283,7 @@ static void test_reuseport_ebpf(const struct test_params p) close(fd[i]); } -static void test_reuseport_cbpf(const struct test_params p) +static void test_reuseport_cbpf(struct test_params p) { int i, fd[p.recv_socks]; @@ -284,6 +291,7 @@ static void test_reuseport_cbpf(const struct test_params p) build_recv_group(p, fd, p.recv_socks, attach_cbpf); test_recv_order(p, fd, p.recv_socks); + p.send_port_min += p.recv_socks * 2; fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); attach_cbpf(fd[0], p.recv_socks / 2); test_recv_order(p, fd, p.recv_socks / 2); @@ -377,7 +385,7 @@ static void test_filter_no_reuseport(const struct test_params p) static void test_filter_without_bind(void) { - int fd1, fd2; + int fd1, fd2, opt = 1; fprintf(stderr, "Testing filter add without bind...\n"); fd1 = socket(AF_INET, SOCK_DGRAM, 0); @@ -386,6 +394,10 @@ static void test_filter_without_bind(void) fd2 = socket(AF_INET, SOCK_DGRAM, 0); if (fd2 < 0) error(1, errno, "failed to create socket 2"); + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 1"); + if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT on socket 2"); attach_ebpf(fd1, 10); attach_cbpf(fd2, 10); @@ -394,6 +406,32 @@ static void test_filter_without_bind(void) close(fd2); } +void enable_fastopen(void) +{ + int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0); + int rw_mask = 3; /* bit 1: client side; bit-2 server side */ + int val, size; + char buf[16]; + + if (fd < 0) + error(1, errno, "Unable to open tcp_fastopen sysctl"); + if (read(fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to read tcp_fastopen sysctl"); + val = atoi(buf); + close(fd); + + if ((val & rw_mask) != rw_mask) { + fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR); + if (fd < 0) + error(1, errno, + "Unable to open tcp_fastopen sysctl for writing"); + val |= rw_mask; + size = snprintf(buf, 16, "%d", val); + if (write(fd, buf, size) <= 0) + error(1, errno, "Unable to write tcp_fastopen sysctl"); + close(fd); + } +} int main(void) { @@ -506,6 +544,71 @@ int main(void) .recv_port = 8007, .send_port_min = 9100}); + /* TCP fastopen is required for the TCP tests */ + enable_fastopen(); + fprintf(stderr, "---- IPv4 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8008, + .send_port_min = 9120}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8009, + .send_port_min = 9160}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8010}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_port = 8011}); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8012, + .send_port_min = 9200}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8013, + .send_port_min = 9240}); + test_extra_filter((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8014}); + test_filter_no_reuseport((struct test_params) { + .recv_family = AF_INET6, + .protocol = SOCK_STREAM, + .recv_port = 8015}); + + fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8016, + .send_port_min = 9320}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_STREAM, + .recv_socks = 10, + .recv_port = 8017, + .send_port_min = 9360}); test_filter_without_bind(); diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c new file mode 100644 index 000000000000..b23d6f54de7b --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c @@ -0,0 +1,258 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. This program creates + * an SO_REUSEPORT receiver group containing one socket per CPU core. It then + * creates a BPF program that will select a socket from this group based + * on the core id that receives the packet. The sending code artificially + * moves itself to run on different core ids and sends one message from + * each core. Since these packets are delivered over loopback, they should + * arrive on the same core that sent them. The receiving code then ensures + * that the packet was received on the socket for the corresponding core id. + * This entire process is done for several different core id permutations + * and for each IPv4/IPv6 and TCP/UDP combination. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + struct sock_filter code[] = { + /* A = raw_smp_processor_id() */ + { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU }, + /* return A */ + { BPF_RET | BPF_A, 0, 0, 0 }, + }; + struct sock_fprog p = { + .len = 2, + .filter = code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF"); +} + +static void send_from_cpu(int cpu_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + cpu_set_t cpu_set; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + memset(&cpu_set, 0, sizeof(cpu_set)); + CPU_SET(cpu_id, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) + error(1, errno, "failed to pin to cpu"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i); + if (cpu_id != i) + error(1, 0, "cpu id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, cpu; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (cpu = 0; cpu < len; ++cpu) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[cpu]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (cpu = 0; cpu < len; ++cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Reverse iterate */ + for (cpu = len - 1; cpu >= 0; --cpu) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Even cores */ + for (cpu = 0; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + /* Odd cores */ + for (cpu = 1; cpu < len; cpu += 2) { + send_from_cpu(cpu, family, proto); + receive_on_cpu(rcv_fd, len, epfd, cpu, proto); + } + + close(epfd); + for (cpu = 0; cpu < len; ++cpu) + close(rcv_fd[cpu]); +} + +int main(void) +{ + int *rcv_fd, cpus; + + cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus <= 0) + error(1, errno, "failed counting cpus"); + + rcv_fd = calloc(cpus, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, cpus, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, cpus, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c new file mode 100644 index 000000000000..90958aaaafb9 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -0,0 +1,208 @@ +/* + * It is possible to use SO_REUSEPORT to open multiple sockets bound to + * equivalent local addresses using AF_INET and AF_INET6 at the same time. If + * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should + * receive a given incoming packet. However, when it is not set, incoming v4 + * packets should prefer the AF_INET socket(s). This behavior was defined with + * the original SO_REUSEPORT implementation, but broke with + * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") + * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the + * AF_INET preference for v4 packets. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + int opt, i; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void send_from_v4(int proto) +{ + struct sockaddr_in saddr, daddr; + int fd; + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = htonl(INADDR_ANY); + saddr.sin_port = 0; + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr.sin_port = htons(PORT); + + fd = socket(AF_INET, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int proto) +{ + struct epoll_event ev; + int epfd, i, test_fd; + uint16_t test_family; + socklen_t len; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_from_v4(proto); + + test_fd = receive_once(epfd, proto); + if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) + error(1, errno, "failed to read socket domain"); + if (test_family != AF_INET) + error(1, 0, "expected to receive on v4 socket but got v6 (%d)", + test_family); + + close(epfd); +} + +int main(void) +{ + int rcv_fds[32], i; + + fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5); + build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_DGRAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n"); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5); + build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_DGRAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + /* NOTE: UDP socket lookups traverse a different code path when there + * are > 10 sockets in a group. + */ + fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16); + build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16); + test(rcv_fds, 32, SOCK_DGRAM); + for (i = 0; i < 32; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n"); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16); + build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16); + test(rcv_fds, 32, SOCK_DGRAM); + for (i = 0; i < 32; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5); + build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_STREAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n"); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5); + build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5); + test(rcv_fds, 10, SOCK_STREAM); + for (i = 0; i < 10; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0c2706bda330..4ca83fe80654 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,12 +8,13 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) +CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) export CFLAGS SUB_DIRS = benchmarks \ copyloops \ + context_switch \ dscr \ mm \ pmu \ @@ -22,7 +23,8 @@ SUB_DIRS = benchmarks \ switch_endian \ syscalls \ tm \ - vphn + vphn \ + math endif diff --git a/tools/testing/selftests/powerpc/basic_asm.h b/tools/testing/selftests/powerpc/basic_asm.h new file mode 100644 index 000000000000..3349a0704d1a --- /dev/null +++ b/tools/testing/selftests/powerpc/basic_asm.h @@ -0,0 +1,70 @@ +#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H +#define _SELFTESTS_POWERPC_BASIC_ASM_H + +#include <ppc-asm.h> +#include <asm/unistd.h> + +#define LOAD_REG_IMMEDIATE(reg,expr) \ + lis reg,(expr)@highest; \ + ori reg,reg,(expr)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(expr)@high; \ + ori reg,reg,(expr)@l; + +/* + * Note: These macros assume that variables being stored on the stack are + * doublewords, while this is usually the case it may not always be the + * case for each use case. + */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#define STACK_FRAME_TOC_POS 24 +#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) +#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) +#else +#define STACK_FRAME_MIN_SIZE 112 +#define STACK_FRAME_TOC_POS 40 +#define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) + +/* + * Caveat: if a function passed more than 8 doublewords, the caller will have + * made more space... which would render the 112 incorrect. + */ +#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8)) +#endif + +/* Parameter x saved to the stack */ +#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) + +/* Local variable x saved to the stack after x parameters */ +#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var) +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +/* + * It is very important to note here that _extra is the extra amount of + * stack space needed. This space can be accessed using STACK_FRAME_PARAM() + * or STACK_FRAME_LOCAL() macros. + * + * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp + * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence + * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets + * preprocessed incorrectly, hence r0. + */ +#define PUSH_BASIC_STACK(_extra) \ + mflr r0; \ + std r0,STACK_FRAME_LR_POS(%r1); \ + stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ + mfcr r0; \ + stw r0,STACK_FRAME_CR_POS(%r1); \ + std %r2,STACK_FRAME_TOC_POS(%r1); + +#define POP_BASIC_STACK(_extra) \ + ld %r2,STACK_FRAME_TOC_POS(%r1); \ + lwz r0,STACK_FRAME_CR_POS(%r1); \ + mtcr r0; \ + addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \ + ld r0,STACK_FRAME_LR_POS(%r1); \ + mtlr r0; + +#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore new file mode 100644 index 000000000000..c1431af7b51c --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/.gitignore @@ -0,0 +1 @@ +cp_abort diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile new file mode 100644 index 000000000000..e164d1466466 --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := cp_abort + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c new file mode 100644 index 000000000000..5a5b55afda0e --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c @@ -0,0 +1,110 @@ +/* + * Adapted from Anton Blanchard's context switch microbenchmark. + * + * Copyright 2009, Anton Blanchard, IBM Corporation. + * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program tests the copy paste abort functionality of a P9 + * (or later) by setting up two processes on the same CPU, one + * which executes the copy instruction and the other which + * executes paste. + * + * The paste instruction should never succeed, as the cp_abort + * instruction is called by the kernel during a context switch. + * + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include "utils.h" +#include <sched.h> + +#define READ_FD 0 +#define WRITE_FD 1 + +#define NUM_LOOPS 1000 + +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define PASTE(RA, RB, L, RC) \ + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31)) + +int paste(void *i) +{ + int cr; + + asm volatile(str(PASTE(0, %1, 1, 1))";" + "mfcr %0;" + : "=r" (cr) + : "b" (i) + : "memory" + ); + return cr; +} + +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define COPY(RA, RB, L) \ + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) + +void copy(void *i) +{ + asm volatile(str(COPY(0, %0, 1))";" + : + : "b" (i) + : "memory" + ); +} + +int test_cp_abort(void) +{ + /* 128 bytes for a full cache line */ + char buf[128] __cacheline_aligned; + cpu_set_t cpuset; + int fd1[2], fd2[2], pid; + char c; + + /* only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* + * Run both processes on the same CPU, so that copy is more likely + * to leak into a paste. + */ + CPU_ZERO(&cpuset); + CPU_SET(pick_online_cpu(), &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset)); + + FAIL_IF(pipe(fd1) || pipe(fd2)); + + pid = fork(); + FAIL_IF(pid < 0); + + if (!pid) { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1); + FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1); + /* A paste succeeds if CR0 EQ bit is set */ + FAIL_IF(paste(buf) & 0x20000000); + } + } else { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1); + copy(buf); + FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1)); + } + } + return 0; + +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_cp_abort, "cp_abort"); +} diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore new file mode 100644 index 000000000000..4fe13a439fd7 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -0,0 +1,6 @@ +fpu_syscall +vmx_syscall +fpu_preempt +vmx_preempt +fpu_signal +vmx_signal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile new file mode 100644 index 000000000000..5b88875d5955 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -0,0 +1,19 @@ +TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c +$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec + +fpu_syscall: fpu_asm.S +fpu_preempt: fpu_asm.S +fpu_signal: fpu_asm.S + +vmx_syscall: vmx_asm.S +vmx_preempt: vmx_asm.S +vmx_signal: vmx_asm.S + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S new file mode 100644 index 000000000000..f3711d80e709 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -0,0 +1,198 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +#define PUSH_FPU(pos) \ + stfd f14,pos(sp); \ + stfd f15,pos+8(sp); \ + stfd f16,pos+16(sp); \ + stfd f17,pos+24(sp); \ + stfd f18,pos+32(sp); \ + stfd f19,pos+40(sp); \ + stfd f20,pos+48(sp); \ + stfd f21,pos+56(sp); \ + stfd f22,pos+64(sp); \ + stfd f23,pos+72(sp); \ + stfd f24,pos+80(sp); \ + stfd f25,pos+88(sp); \ + stfd f26,pos+96(sp); \ + stfd f27,pos+104(sp); \ + stfd f28,pos+112(sp); \ + stfd f29,pos+120(sp); \ + stfd f30,pos+128(sp); \ + stfd f31,pos+136(sp); + +#define POP_FPU(pos) \ + lfd f14,pos(sp); \ + lfd f15,pos+8(sp); \ + lfd f16,pos+16(sp); \ + lfd f17,pos+24(sp); \ + lfd f18,pos+32(sp); \ + lfd f19,pos+40(sp); \ + lfd f20,pos+48(sp); \ + lfd f21,pos+56(sp); \ + lfd f22,pos+64(sp); \ + lfd f23,pos+72(sp); \ + lfd f24,pos+80(sp); \ + lfd f25,pos+88(sp); \ + lfd f26,pos+96(sp); \ + lfd f27,pos+104(sp); \ + lfd f28,pos+112(sp); \ + lfd f29,pos+120(sp); \ + lfd f30,pos+128(sp); \ + lfd f31,pos+136(sp); + +# Careful calling this, it will 'clobber' fpu (by design) +# Don't call this from C +FUNC_START(load_fpu) + lfd f14,0(r3) + lfd f15,8(r3) + lfd f16,16(r3) + lfd f17,24(r3) + lfd f18,32(r3) + lfd f19,40(r3) + lfd f20,48(r3) + lfd f21,56(r3) + lfd f22,64(r3) + lfd f23,72(r3) + lfd f24,80(r3) + lfd f25,88(r3) + lfd f26,96(r3) + lfd f27,104(r3) + lfd f28,112(r3) + lfd f29,120(r3) + lfd f30,128(r3) + lfd f31,136(r3) + blr +FUNC_END(load_fpu) + +FUNC_START(check_fpu) + mr r4,r3 + li r3,1 # assume a bad result + lfd f0,0(r4) + fcmpu cr1,f0,f14 + bne cr1,1f + lfd f0,8(r4) + fcmpu cr1,f0,f15 + bne cr1,1f + lfd f0,16(r4) + fcmpu cr1,f0,f16 + bne cr1,1f + lfd f0,24(r4) + fcmpu cr1,f0,f17 + bne cr1,1f + lfd f0,32(r4) + fcmpu cr1,f0,f18 + bne cr1,1f + lfd f0,40(r4) + fcmpu cr1,f0,f19 + bne cr1,1f + lfd f0,48(r4) + fcmpu cr1,f0,f20 + bne cr1,1f + lfd f0,56(r4) + fcmpu cr1,f0,f21 + bne cr1,1f + lfd f0,64(r4) + fcmpu cr1,f0,f22 + bne cr1,1f + lfd f0,72(r4) + fcmpu cr1,f0,f23 + bne cr1,1f + lfd f0,80(r4) + fcmpu cr1,f0,f24 + bne cr1,1f + lfd f0,88(r4) + fcmpu cr1,f0,f25 + bne cr1,1f + lfd f0,96(r4) + fcmpu cr1,f0,f26 + bne cr1,1f + lfd f0,104(r4) + fcmpu cr1,f0,f27 + bne cr1,1f + lfd f0,112(r4) + fcmpu cr1,f0,f28 + bne cr1,1f + lfd f0,120(r4) + fcmpu cr1,f0,f29 + bne cr1,1f + lfd f0,128(r4) + fcmpu cr1,f0,f30 + bne cr1,1f + lfd f0,136(r4) + fcmpu cr1,f0,f31 + bne cr1,1f + li r3,0 # Success!!! +1: blr + +FUNC_START(test_fpu) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # f14-f31 are non volatiles + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray + std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid + PUSH_FPU(STACK_FRAME_LOCAL(2,0)) + + bl load_fpu + nop + li r0,__NR_fork + sc + + # pass the result of the fork to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + + POP_FPU(STACK_FRAME_LOCAL(2,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(test_fpu) + +# int preempt_fpu(double *darray, int *threads_running, int *running) +# On starting will (atomically) decrement not_ready as a signal that the FPU +# has been loaded with darray. Will proceed to check the validity of the FPU +# registers while running is not zero. +FUNC_START(preempt_fpu) + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # double *darray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + PUSH_FPU(STACK_FRAME_LOCAL(3,0)) + + bl load_fpu + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_FPU(STACK_FRAME_LOCAL(3,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(preempt_fpu) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c new file mode 100644 index 000000000000..0f85b79d883d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -0,0 +1,113 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across preemption. + * Two things should be noted here a) The check_fpu function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int threads_starting; +int running; + +extern void preempt_fpu(double *darray, int *threads_starting, int *running); + +void *preempt_fpu_c(void *p) +{ + int i; + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + /* Test failed if it ever returns */ + preempt_fpu(darray, &threads_starting, &running); + + return p; +} + +int test_preempt_fpu(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc((threads) * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really necessary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_fpu + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_fpu, "fpu_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c new file mode 100644 index 000000000000..888aa51b4204 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -0,0 +1,135 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers are correctly reported in a + * signal context. Each worker just spins checking its FPU registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +bool bad_context; +int threads_starting; +int running; + +extern long preempt_fpu(double *darray, int *threads_starting, int *running); + +void signal_fpu_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 14; i < 32; i++) { + if (mc->fp_regs[i] != darray[i - 14]) { + bad_context = true; + break; + } + } +} + +void *signal_fpu_c(void *p) +{ + int i; + long rc; + struct sigaction act; + act.sa_sigaction = signal_fpu_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + rc = preempt_fpu(darray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_fpu(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for all workers to start..."); + while (threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tStopping workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_fpu + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_fpu, "fpu_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c new file mode 100644 index 000000000000..949e6721256d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across a syscall (fork). + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> + +#include "utils.h" + +extern int test_fpu(double *darray, pid_t *pid); + +double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int syscall_fpu(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_fpu will fork() */ + ret = test_fpu(darray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_syscall_fpu(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + /* Can't FAIL_IF(pid2 == -1); because already forked once */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure test is a fail + */ + child_ret = ret = 1; + } else { + ret = syscall_fpu(); + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_syscall_fpu, "syscall_fpu"); + +} diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S new file mode 100644 index 000000000000..1b8c248b3ac1 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_asm.S @@ -0,0 +1,235 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +# POS MUST BE 16 ALIGNED! +#define PUSH_VMX(pos,reg) \ + li reg,pos; \ + stvx v20,reg,sp; \ + addi reg,reg,16; \ + stvx v21,reg,sp; \ + addi reg,reg,16; \ + stvx v22,reg,sp; \ + addi reg,reg,16; \ + stvx v23,reg,sp; \ + addi reg,reg,16; \ + stvx v24,reg,sp; \ + addi reg,reg,16; \ + stvx v25,reg,sp; \ + addi reg,reg,16; \ + stvx v26,reg,sp; \ + addi reg,reg,16; \ + stvx v27,reg,sp; \ + addi reg,reg,16; \ + stvx v28,reg,sp; \ + addi reg,reg,16; \ + stvx v29,reg,sp; \ + addi reg,reg,16; \ + stvx v30,reg,sp; \ + addi reg,reg,16; \ + stvx v31,reg,sp; + +# POS MUST BE 16 ALIGNED! +#define POP_VMX(pos,reg) \ + li reg,pos; \ + lvx v20,reg,sp; \ + addi reg,reg,16; \ + lvx v21,reg,sp; \ + addi reg,reg,16; \ + lvx v22,reg,sp; \ + addi reg,reg,16; \ + lvx v23,reg,sp; \ + addi reg,reg,16; \ + lvx v24,reg,sp; \ + addi reg,reg,16; \ + lvx v25,reg,sp; \ + addi reg,reg,16; \ + lvx v26,reg,sp; \ + addi reg,reg,16; \ + lvx v27,reg,sp; \ + addi reg,reg,16; \ + lvx v28,reg,sp; \ + addi reg,reg,16; \ + lvx v29,reg,sp; \ + addi reg,reg,16; \ + lvx v30,reg,sp; \ + addi reg,reg,16; \ + lvx v31,reg,sp; + +# Carefull this will 'clobber' vmx (by design) +# Don't call this from C +FUNC_START(load_vmx) + li r5,0 + lvx v20,r5,r3 + addi r5,r5,16 + lvx v21,r5,r3 + addi r5,r5,16 + lvx v22,r5,r3 + addi r5,r5,16 + lvx v23,r5,r3 + addi r5,r5,16 + lvx v24,r5,r3 + addi r5,r5,16 + lvx v25,r5,r3 + addi r5,r5,16 + lvx v26,r5,r3 + addi r5,r5,16 + lvx v27,r5,r3 + addi r5,r5,16 + lvx v28,r5,r3 + addi r5,r5,16 + lvx v29,r5,r3 + addi r5,r5,16 + lvx v30,r5,r3 + addi r5,r5,16 + lvx v31,r5,r3 + blr +FUNC_END(load_vmx) + +# Should be safe from C, only touches r4, r5 and v0,v1,v2 +FUNC_START(check_vmx) + PUSH_BASIC_STACK(32) + mr r4,r3 + li r3,1 # assume a bad result + li r5,0 + lvx v0,r5,r4 + vcmpequd. v1,v0,v20 + vmr v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v21 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v22 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v23 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v24 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v25 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v26 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v27 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v28 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v29 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v30 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v31 + vand v2,v2,v1 + + li r5,STACK_FRAME_LOCAL(0,0) + stvx v2,r5,sp + ldx r0,r5,sp + cmpdi r0,0xffffffffffffffff + bne 1f + li r3,0 +1: POP_BASIC_STACK(32) + blr +FUNC_END(check_vmx) + +# Safe from C +FUNC_START(test_vmx) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # v20-v31 are non-volatile + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray + std r4,STACK_FRAME_PARAM(1)(sp) # address of pid + PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4) + + bl load_vmx + nop + + li r0,__NR_fork + sc + # Pass the result of fork back to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + + POP_VMX(STACK_FRAME_LOCAL(2,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(test_vmx) + +# int preempt_vmx(vector int *varray, int *threads_starting, int *running) +# On starting will (atomically) decrement threads_starting as a signal that +# the VMX have been loaded with varray. Will proceed to check the validity of +# the VMX registers while running is not zero. +FUNC_START(preempt_vmx) + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0) + PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4) + + bl load_vmx + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(preempt_vmx) diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c new file mode 100644 index 000000000000..9ef376c55b13 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across preemption. + * Two things should be noted here a) The check_vmx function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +int threads_starting; +int running; + +extern void preempt_vmx(vector int *varray, int *threads_starting, int *running); + +void *preempt_vmx_c(void *p) +{ + int i, j; + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + /* Test fails if it ever returns */ + preempt_vmx(varray, &threads_starting, &running); + return p; +} + +int test_preempt_vmx(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really nessesary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_vmx + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_vmx, "vmx_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c new file mode 100644 index 000000000000..671d7533a557 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -0,0 +1,156 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers are correctly reported in a + * signal context. Each worker just spins checking its VMX registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> +#include <altivec.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +bool bad_context; +int running; +int threads_starting; + +extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal); + +void signal_vmx_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 20; i < 32; i++) { + if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) { + int j; + /* + * Shouldn't printf() in a signal handler, however, this is a + * test and we've detected failure. Understanding what failed + * is paramount. All that happens after this is tests exit with + * failure. + */ + printf("VMX mismatch at reg %d!\n", i); + printf("Reg | Actual | Expected\n"); + for (j = 20; j < 32; j++) { + printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0], + mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3], + varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]); + } + bad_context = true; + break; + } + } +} + +void *signal_vmx_c(void *p) +{ + int i, j; + long rc; + struct sigaction act; + act.sa_sigaction = signal_vmx_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + rc = preempt_vmx(varray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_vmx(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for %d workers to start... %d", threads, threads_starting); + while (threads_starting) { + asm volatile("": : :"memory"); + usleep(1000); + printf(", %d", threads_starting); + } + printf(" ...done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tKilling workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_vmx + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_vmx, "vmx_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c new file mode 100644 index 000000000000..a017918ee1ca --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -0,0 +1,91 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across a syscall (fork). + */ + +#include <altivec.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "utils.h" + +vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +extern int test_vmx(vector int *varray, pid_t *pid); + +int vmx_syscall(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_vmx will fork() */ + ret = test_vmx(varray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_vmx_syscall(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + ret = vmx_syscall(); + /* Can't FAIL_IF(pid2 == -1); because we've already forked */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure child_ret is set and is a fail + */ + ret = child_ret = 1; + } else { + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_vmx_syscall, "vmx_syscall"); + +} diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 440180ff8089..35ade7406dcd 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -73,7 +73,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) want_fault |= (subpage == ((page + 1) % 16)); if (faulted != want_fault) { - printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", addr, page, subpage, write, want_fault ? "fault" : "pass", faulted ? "fault" : "pass"); @@ -82,7 +82,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) if (faulted) { if (dar != addr) { - printf("Fault expected at 0x%p and happened at 0x%p !\n", + printf("Fault expected at %p and happened at %p !\n", addr, dar); } faulted = 0; @@ -162,7 +162,7 @@ int test_anon(void) mallocblock = (void *)align; - printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + printf("allocated malloc block of 0x%lx bytes at %p\n", mallocsize, mallocblock); printf("testing malloc block...\n"); @@ -197,7 +197,7 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at 0x%p\n", + printf("allocated %s for 0x%lx bytes at %p\n", file_name, filesize, fileblock); printf("testing file map...\n"); @@ -207,14 +207,16 @@ int test_file(void) int main(int argc, char *argv[]) { - test_harness(test_anon, "subpage_prot_anon"); + int rc; + + rc = test_harness(test_anon, "subpage_prot_anon"); + if (rc) + return rc; if (argc > 1) file_name = argv[1]; else file_name = "tempfile"; - test_harness(test_file, "subpage_prot_file"); - - return 0; + return test_harness(test_file, "subpage_prot_file"); } diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index e67452f1bcff..46681fec549b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -15,7 +15,6 @@ #include <sys/ioctl.h> #include "trace.h" -#include "reg.h" #include "ebb.h" diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c index 5b1188f10c15..f923228bca22 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c @@ -7,7 +7,6 @@ #include <stdlib.h> #include "ebb.h" -#include "reg.h" /* diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/reg.h index 5921b0dfe2e9..65bfdeeebdee 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg.h +++ b/tools/testing/selftests/powerpc/reg.h @@ -9,12 +9,12 @@ #define __stringify_1(x) #x #define __stringify(x) __stringify_1(x) -#define mfspr(rn) ({unsigned long rval; \ - asm volatile("mfspr %0," __stringify(rn) \ - : "=r" (rval)); rval; }) -#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ - : "r" ((unsigned long)(v)) \ - : "memory") +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," _str(rn) \ + : "=r" (rval)); rval; }) +#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \ + : "r" ((unsigned long)(v)) \ + : "memory") #define mb() asm volatile("sync" : : : "memory"); @@ -46,4 +46,10 @@ #define SPRN_SDAR 781 #define SPRN_SIER 768 +#define SPRN_TEXASR 0x82 +#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ +#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define TEXASR_FS 0x08000000 +#define SPRN_TAR 0x32f + #endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 7d0f14b8cb2e..bb942db845bf 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -3,3 +3,6 @@ tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +tm-fork +tm-tar +tm-tmspr diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 737f72c964e6..d0505dbd22d5 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr all: $(TEST_PROGS) @@ -6,6 +6,7 @@ $(TEST_PROGS): ../harness.c ../utils.c tm-syscall: tm-syscall-asm.S tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include +tm-tmspr: CFLAGS += -pthread include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c new file mode 100644 index 000000000000..8d48579b7778 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-fork.c @@ -0,0 +1,42 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Edited: Rashmica Gupta, Nov 2015 + * + * This test does a fork syscall inside a transaction. Basic sniff test + * to see if we can enter the kernel during a transaction. + */ + +#include <errno.h> +#include <inttypes.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "utils.h" +#include "tm.h" + +int test_fork(void) +{ + SKIP_IF(!have_htm()); + + asm __volatile__( + "tbegin.;" + "blt 1f; " + "li 0, 2;" /* fork syscall */ + "sc ;" + "tend.;" + "1: ;" + : : : "memory", "r0"); + /* If we reach here, we've passed. Otherwise we've probably crashed + * the kernel */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_fork, "tm_fork"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 8fde93d6021f..d9c49f41515e 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -31,12 +31,6 @@ #include "utils.h" #include "tm.h" -#define TBEGIN ".long 0x7C00051D ;" -#define TEND ".long 0x7C00055D ;" -#define TCHECK ".long 0x7C00059C ;" -#define TSUSPEND ".long 0x7C0005DD ;" -#define TRESUME ".long 0x7C2005DD ;" -#define SPRN_TEXASR 0x82 #define SPRN_DSCR 0x03 int test_body(void) @@ -55,13 +49,13 @@ int test_body(void) "mtspr %[sprn_dscr], 3;" /* start and suspend a transaction */ - TBEGIN + "tbegin.;" "beq 1f;" - TSUSPEND + "tsuspend.;" /* hard loop until the transaction becomes doomed */ "2: ;" - TCHECK + "tcheck 0;" "bc 4, 0, 2b;" /* record DSCR and TEXASR */ @@ -70,8 +64,8 @@ int test_body(void) "mfspr 3, %[sprn_texasr];" "std 3, %[texasr];" - TRESUME - TEND + "tresume.;" + "tend.;" "li %[rv], 0;" "1: ;" : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c index d86653f282b1..8c54d18b3e9a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c @@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc) #ifdef __powerpc64__ ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32); #else - ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL); + ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL); #endif /* Should segv on return becuase of invalid context */ segv_expected = 1; diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c index e44a238c1d77..1f0eb567438d 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c @@ -60,9 +60,9 @@ int tm_signal_stack() exit(1); asm volatile("li 1, 0 ;" /* stack ptr == NULL */ "1:" - ".long 0x7C00051D ;" /* tbegin */ + "tbegin.;" "beq 1b ;" /* retry forever */ - ".long 0x7C0005DD ; ;" /* tsuspend */ + "tsuspend.;" "ld 2, 0(1) ;" /* trigger segv" */ : : : "memory"); diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c new file mode 100644 index 000000000000..2d2fcc2b7a60 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tar.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Michael Neuling 19/7/2013 + * Edited: Rashmica Gupta 01/12/2015 + * + * Do some transactions, see if the tar is corrupted. + * If the transaction is aborted, the TAR should be rolled back to the + * checkpointed value before the transaction began. The value written to + * TAR in suspended mode should only remain in TAR if the transaction + * completes. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +#include "tm.h" +#include "utils.h" + +int num_loops = 10000; + +int test_tar(void) +{ + int i; + + SKIP_IF(!have_htm()); + + for (i = 0; i < num_loops; i++) + { + uint64_t result = 0; + asm __volatile__( + "li 7, 1;" + "mtspr %[tar], 7;" /* tar = 1 */ + "tbegin.;" + "beq 3f;" + "li 4, 0x7000;" /* Loop lots, to use time */ + "2:;" /* Start loop */ + "li 7, 2;" + "mtspr %[tar], 7;" /* tar = 2 */ + "tsuspend.;" + "li 7, 3;" + "mtspr %[tar], 7;" /* tar = 3 */ + "tresume.;" + "subi 4, 4, 1;" + "cmpdi 4, 0;" + "bne 2b;" + "tend.;" + + /* Transaction sucess! TAR should be 3 */ + "mfspr 7, %[tar];" + "ori %[res], 7, 4;" // res = 3|4 = 7 + "b 4f;" + + /* Abort handler. TAR should be rolled back to 1 */ + "3:;" + "mfspr 7, %[tar];" + "ori %[res], 7, 8;" // res = 1|8 = 9 + "4:;" + + : [res]"=r"(result) + : [tar]"i"(SPRN_TAR) + : "memory", "r0", "r4", "r7"); + + /* If result is anything else other than 7 or 9, the tar + * value must have been corrupted. */ + if ((result != 7) && (result != 9)) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + /* A low number of iterations (eg 100) can cause a false pass */ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\n\t%s [<num loops>]\n", + argv[0]); + return 1; + } else { + num_loops = atoi(argv[1]); + } + } + + printf("Starting, %d loops\n", num_loops); + + return test_harness(test_tar, "tm_tar"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c new file mode 100644 index 000000000000..2bda81c7bf23 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -0,0 +1,143 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Original: Michael Neuling 3/4/2014 + * Modified: Rashmica Gupta 8/12/2015 + * + * Check if any of the Transaction Memory SPRs get corrupted. + * - TFIAR - stores address of location of transaction failure + * - TFHAR - stores address of software failure handler (if transaction + * fails) + * - TEXASR - lots of info about the transacion(s) + * + * (1) create more threads than cpus + * (2) in each thread: + * (a) set TFIAR and TFHAR a unique value + * (b) loop for awhile, continually checking to see if + * either register has been corrupted. + * + * (3) Loop: + * (a) begin transaction + * (b) abort transaction + * (c) check TEXASR to see if FS has been corrupted + * + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <pthread.h> +#include <string.h> + +#include "utils.h" +#include "tm.h" + +int num_loops = 10000; +int passed = 1; + +void tfiar_tfhar(void *in) +{ + int i, cpu; + unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + cpu = (unsigned long)in >> 1; + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + /* TFIAR: Last bit has to be high so userspace can read register */ + tfiar = ((unsigned long)in) + 1; + tfiar += 2; + mtspr(SPRN_TFIAR, tfiar); + + /* TFHAR: Last two bits are reserved */ + tfhar = ((unsigned long)in); + tfhar &= ~0x3UL; + tfhar += 4; + mtspr(SPRN_TFHAR, tfhar); + + for (i = 0; i < num_loops; i++) { + tfhar_rd = mfspr(SPRN_TFHAR); + tfiar_rd = mfspr(SPRN_TFIAR); + if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) { + passed = 0; + return; + } + } + return; +} + +void texasr(void *in) +{ + unsigned long i; + uint64_t result = 0; + + for (i = 0; i < num_loops; i++) { + asm __volatile__( + "tbegin.;" + "beq 3f ;" + "tabort. 0 ;" + "tend.;" + + /* Abort handler */ + "3: ;" + ::: "memory"); + + /* Check the TEXASR */ + result = mfspr(SPRN_TEXASR); + if ((result & TEXASR_FS) == 0) { + passed = 0; + return; + } + } + return; +} + +int test_tmspr() +{ + pthread_t thread; + int thread_num; + unsigned long i; + + SKIP_IF(!have_htm()); + + /* To cause some context switching */ + thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN); + + /* Test TFIAR and TFHAR */ + for (i = 0 ; i < thread_num ; i += 2){ + if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + /* Test TEXASR */ + for (i = 0 ; i < thread_num ; i++){ + if (pthread_create(&thread, NULL, (void*)texasr, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + if (passed) + return 0; + else + return 1; +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\t [<num loops>]\n"); + return 0; + } else { + num_loops = atoi(argv[1]); + } + } + return test_harness(test_tmspr, "tm_tmspr"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 175ac6ad10dd..a985cfaa535e 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -6,9 +6,12 @@ #ifndef _SELFTESTS_POWERPC_UTILS_H #define _SELFTESTS_POWERPC_UTILS_H +#define __cacheline_aligned __attribute__((aligned(128))) + #include <stdint.h> #include <stdbool.h> #include <linux/auxvec.h> +#include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ typedef unsigned long long u64; @@ -54,4 +57,9 @@ do { \ #define _str(s) #s #define str(s) _str(s) +/* POWER9 feature */ +#ifndef PPC_FEATURE2_ARCH_3_00 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif + #endif /* _SELFTESTS_POWERPC_UTILS_H */ diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh new file mode 100755 index 000000000000..3633828375e3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Alternate sleeping and spinning on randomly selected CPUs. The purpose +# of this script is to inflict random OS jitter on a concurrently running +# test. +# +# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ] +# +# me: Random-number-generator seed salt. +# duration: Time to run in seconds. +# sleepmax: Maximum microseconds to sleep, defaults to one second. +# spinmax: Maximum microseconds to spin, defaults to one millisecond. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +me=$(($1 * 1000)) +duration=$2 +sleepmax=${3-1000000} +spinmax=${4-1000} + +n=1 + +starttime=`awk 'BEGIN { print systime(); }' < /dev/null` + +while : +do + # Check for done. + t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null` + if test "$t" -gt "$duration" + then + exit 0; + fi + + # Set affinity to randomly selected CPU + cpus=`ls /sys/devices/system/cpu/*/online | + sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' | + grep -v '^0*$'` + cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN { + srand(n + me + systime()); + ncpus = split(cpus, ca); + curcpu = ca[int(rand() * ncpus + 1)]; + mask = lshift(1, curcpu); + if (mask + 0 <= 0) + mask = 1; + printf("%#x\n", mask); + }' < /dev/null` + n=$(($n+1)) + if ! taskset -p $cpumask $$ > /dev/null 2>&1 + then + echo taskset failure: '"taskset -p ' $cpumask $$ '"' + exit 1 + fi + + # Sleep a random duration + sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN { + srand(n + me + systime()); + printf("%06d", int(rand() * sleepmax)); + }' < /dev/null` + n=$(($n+1)) + sleep .$sleeptime + + # Spin a random duration + limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN { + srand(n + me + systime()); + printf("%06d", int(rand() * spinmax)); + }' < /dev/null` + n=$(($n+1)) + for i in {1..$limit} + do + echo > /dev/null + done +done + +exit 1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh new file mode 100755 index 000000000000..f79b0e9e84fc --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# +# Analyze a given results directory for rcuperf performance measurements, +# looking for ftrace data. Exits with 0 if data was found, analyzed, and +# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after +# argument checking. +# +# Usage: kvm-recheck-rcuperf-ftrace.sh resdir +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +i="$1" +. tools/testing/selftests/rcutorture/bin/functions.sh + +if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100 +then + exit 10 +fi + +sed -e 's/^\[[^]]*]//' < $i/console.log | +grep 'us : rcu_exp_grace_period' | +sed -e 's/us : / : /' | +tr -d '\015' | +awk ' +$8 == "start" { + if (starttask != "") + nlost++; + starttask = $1; + starttime = $3; + startseq = $7; +} + +$8 == "end" { + if (starttask == $1 && startseq == $7) { + curgpdur = $3 - starttime; + gptimes[++n] = curgpdur; + gptaskcnt[starttask]++; + sum += curgpdur; + if (curgpdur > 1000) + print "Long GP " starttime "us to " $3 "us (" curgpdur "us)"; + starttask = ""; + } else { + # Lost a message or some such, reset. + starttask = ""; + nlost++; + } +} + +$8 == "done" { + piggybackcnt[$1]++; +} + +END { + newNR = asort(gptimes); + if (newNR <= 0) { + print "No ftrace records found???" + exit 10; + } + pct50 = int(newNR * 50 / 100); + if (pct50 < 1) + pct50 = 1; + pct90 = int(newNR * 90 / 100); + if (pct90 < 1) + pct90 = 1; + pct99 = int(newNR * 99 / 100); + if (pct99 < 1) + pct99 = 1; + div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; + print "Histogram bucket size: " div; + last = gptimes[1] - 10; + count = 0; + for (i = 1; i <= newNR; i++) { + current = div * int(gptimes[i] / div); + if (last == current) { + count++; + } else { + if (count > 0) + print last, count; + count = 1; + last = current; + } + } + if (count > 0) + print last, count; + print "Distribution of grace periods across tasks:"; + for (i in gptaskcnt) { + print "\t" i, gptaskcnt[i]; + nbatches += gptaskcnt[i]; + } + ngps = nbatches; + print "Distribution of piggybacking across tasks:"; + for (i in piggybackcnt) { + print "\t" i, piggybackcnt[i]; + ngps += piggybackcnt[i]; + } + print "Average grace-period duration: " sum / newNR " microseconds"; + print "Minimum grace-period duration: " gptimes[1]; + print "50th percentile grace-period duration: " gptimes[pct50]; + print "90th percentile grace-period duration: " gptimes[pct90]; + print "99th percentile grace-period duration: " gptimes[pct99]; + print "Maximum grace-period duration: " gptimes[newNR]; + print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0; + print "Computed from ftrace data."; +}' +exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh new file mode 100755 index 000000000000..8f3121afc716 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# +# Analyze a given results directory for rcuperf performance measurements. +# +# Usage: kvm-recheck-rcuperf.sh resdir +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +i="$1" +if test -d $i +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH +. tools/testing/selftests/rcutorture/bin/functions.sh + +if kvm-recheck-rcuperf-ftrace.sh $i +then + # ftrace data was successfully analyzed, call it good! + exit 0 +fi + +configfile=`echo $i | sed -e 's/^.*\///'` + +sed -e 's/^\[[^]]*]//' < $i/console.log | +awk ' +/-perf: .* gps: .* batches:/ { + ngps = $9; + nbatches = $11; +} + +/-perf: .*writer-duration/ { + gptimes[++n] = $5 / 1000.; + sum += $5 / 1000.; +} + +END { + newNR = asort(gptimes); + if (newNR <= 0) { + print "No rcuperf records found???" + exit; + } + pct50 = int(newNR * 50 / 100); + if (pct50 < 1) + pct50 = 1; + pct90 = int(newNR * 90 / 100); + if (pct90 < 1) + pct90 = 1; + pct99 = int(newNR * 99 / 100); + if (pct99 < 1) + pct99 = 1; + div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100; + print "Histogram bucket size: " div; + last = gptimes[1] - 10; + count = 0; + for (i = 1; i <= newNR; i++) { + current = div * int(gptimes[i] / div); + if (last == current) { + count++; + } else { + if (count > 0) + print last, count; + count = 1; + last = current; + } + } + if (count > 0) + print last, count; + print "Average grace-period duration: " sum / newNR " microseconds"; + print "Minimum grace-period duration: " gptimes[1]; + print "50th percentile grace-period duration: " gptimes[pct50]; + print "90th percentile grace-period duration: " gptimes[pct90]; + print "99th percentile grace-period duration: " gptimes[pct99]; + print "Maximum grace-period duration: " gptimes[newNR]; + print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; + print "Computed from rcuperf printk output."; +}' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index d86bdd6b6cc2..f659346d3358 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -48,7 +48,10 @@ do cat $i/Make.oldconfig.err fi parse-build.sh $i/Make.out $configfile - parse-torture.sh $i/console.log $configfile + if test "$TORTURE_SUITE" != rcuperf + then + parse-torture.sh $i/console.log $configfile + fi parse-console.sh $i/console.log $configfile if test -r $i/Warnings then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 0f80eefb0bfd..4109f306d855 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -6,7 +6,7 @@ # Execute this in the source tree. Do not run it as a background task # because qemu does not seem to like that much. # -# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args +# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args # # qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with # arguments specifying the number of CPUs and other @@ -91,25 +91,33 @@ fi # CONFIG_PCMCIA=n # CONFIG_CARDBUS=n # CONFIG_YENTA=n -if kvm-build.sh $config_template $builddir $T +base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` +if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux then + # Rerunning previous test, so use that test's kernel. + QEMU="`identify_qemu $base_resdir/vmlinux`" + KERNEL=$base_resdir/bzImage + ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh + ln -s $base_resdir/.config $resdir # for kvm-recheck.sh +elif kvm-build.sh $config_template $builddir $T +then + # Had to build a kernel for this test. QEMU="`identify_qemu $builddir/vmlinux`" BOOT_IMAGE="`identify_boot_image $QEMU`" cp $builddir/Make*.out $resdir + cp $builddir/vmlinux $resdir cp $builddir/.config $resdir if test -n "$BOOT_IMAGE" then cp $builddir/$BOOT_IMAGE $resdir + KERNEL=$resdir/bzImage else echo No identifiable boot image, not running KVM, see $resdir. echo Do the torture scripts know about your architecture? fi parse-build.sh $resdir/Make.out $title - if test -f $builddir.wait - then - mv $builddir.wait $builddir.ready - fi else + # Build failed. cp $builddir/Make*.out $resdir cp $builddir/.config $resdir || : echo Build failed, not running KVM, see $resdir. @@ -119,12 +127,15 @@ else fi exit 1 fi +if test -f $builddir.wait +then + mv $builddir.wait $builddir.ready +fi while test -f $builddir.ready do sleep 1 done -minutes=$4 -seconds=$(($minutes * 60)) +seconds=$4 qemu_args=$5 boot_args=$6 @@ -167,15 +178,26 @@ then exit 0 fi echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log -echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd -( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) & -qemu_pid=$! +echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 -echo Monitoring qemu job at pid $qemu_pid +sleep 10 # Give qemu's pid a chance to reach the file +if test -s "$resdir/qemu_pid" +then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid +else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid +fi while : do + if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + fi kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if kill -0 $qemu_pid > /dev/null 2>&1 + if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 then if test $kruntime -ge $seconds then @@ -195,12 +217,16 @@ do ps -fp $killpid >> $resdir/Warnings 2>&1 fi else - echo ' ---' `date`: Kernel done + echo ' ---' `date`: "Kernel done" fi break fi done -if test $commandcompleted -eq 0 +if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" +then + qemu_pid=`cat "$resdir/qemu_pid"` +fi +if test $commandcompleted -eq 0 -a -n "$qemu_pid" then echo Grace period for qemu job at pid $qemu_pid while : @@ -220,6 +246,9 @@ then fi sleep 1 done +elif test -z "$qemu_pid" +then + echo Unknown PID, cannot kill qemu command fi parse-torture.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 4a431767f77a..0d598145873e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -34,7 +34,7 @@ T=/tmp/kvm.sh.$$ trap 'rm -rf $T' 0 mkdir $T -dur=30 +dur=$((30*60)) dryrun="" KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM PATH=${KVM}/bin:$PATH; export PATH @@ -48,6 +48,7 @@ resdir="" configs="" cpus=0 ds=`date +%Y.%m.%d-%H:%M:%S` +jitter=0 . functions.sh @@ -63,6 +64,7 @@ usage () { echo " --dryrun sched|script" echo " --duration minutes" echo " --interactive" + echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --no-initrd" @@ -116,12 +118,17 @@ do ;; --duration) checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error' - dur=$2 + dur=$(($2*60)) shift ;; --interactive) TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE ;; + --jitter) + checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$' + jitter="$2" + shift + ;; --kmake-arg) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' TORTURE_KMAKE_ARG="$2" @@ -156,7 +163,7 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--' TORTURE_SUITE=$2 shift ;; @@ -299,6 +306,7 @@ awk < $T/cfgcpu.pack \ -v CONFIGDIR="$CONFIGFRAG/" \ -v KVM="$KVM" \ -v ncpus=$cpus \ + -v jitter="$jitter" \ -v rd=$resdir/$ds/ \ -v dur=$dur \ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \ @@ -359,6 +367,16 @@ function dump(first, pastlast, batchnum) print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log"; print "fi" } + njitter = 0; + split(jitter, ja); + if (ja[1] == -1 && ncpus == 0) + njitter = 1; + else if (ja[1] == -1) + njitter = ncpus; + else + njitter = ja[1]; + for (j = 0; j < njitter; j++) + print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&" print "wait" print "if test -z \"$TORTURE_BUILDONLY\"" print "then" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 39a2c6d7d7ec..17cbe098b115 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -14,7 +14,7 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=4 -CONFIG_RCU_FANOUT_LEAF=4 +CONFIG_RCU_FANOUT_LEAF=3 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot index 0fc8a3428938..e34c33430447 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -1 +1 @@ -rcutorture.torture_type=rcu_bh +rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST new file mode 100644 index 000000000000..c9f56cf20775 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST @@ -0,0 +1 @@ +TREE diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon new file mode 100644 index 000000000000..a09816b8c0f3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_PERF_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE new file mode 100644 index 000000000000..a312f671a29a --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE @@ -0,0 +1,20 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 new file mode 100644 index 000000000000..985fb170d13c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 @@ -0,0 +1,23 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=54 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh new file mode 100644 index 000000000000..34f2a1b35ee5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2015 +# +# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +# rcuperf_param_nreaders bootparam-string +# +# Adds nreaders rcuperf module parameter if not already specified. +rcuperf_param_nreaders () { + if ! echo "$1" | grep -q "rcuperf.nreaders" + then + echo rcuperf.nreaders=-1 + fi +} + +# rcuperf_param_nwriters bootparam-string +# +# Adds nwriters rcuperf module parameter if not already specified. +rcuperf_param_nwriters () { + if ! echo "$1" | grep -q "rcuperf.nwriters" + then + echo rcuperf.nwriters=-1 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcuperf_param_nreaders "$1"` \ + `rcuperf_param_nwriters "$1"` \ + rcuperf.perf_runnable=1 \ + rcuperf.shutdown=1 \ + rcuperf.verbose=1 +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b9453b838162..7947e568e057 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -5,6 +5,7 @@ * Test code for seccomp bpf. */ +#include <sys/types.h> #include <asm/siginfo.h> #define __have_siginfo_t 1 #define __have_sigval_t 1 @@ -14,7 +15,6 @@ #include <linux/filter.h> #include <sys/prctl.h> #include <sys/ptrace.h> -#include <sys/types.h> #include <sys/user.h> #include <linux/prctl.h> #include <linux/ptrace.h> @@ -1242,6 +1242,12 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS s390_regs # define SYSCALL_NUM gprs[2] # define SYSCALL_RET gprs[2] +#elif defined(__mips__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM regs[2] +# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_RET regs[2] +# define SYSCALL_NUM_RET_SHARE_REG #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1249,7 +1255,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ -#if defined(__x86_64__) || defined(__i386__) +#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) #define HAVE_GETREGS #endif @@ -1273,6 +1279,10 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } #endif +#if defined(__mips__) + if (regs.SYSCALL_NUM == __NR_O32_Linux) + return regs.SYSCALL_SYSCALL_NUM; +#endif return regs.SYSCALL_NUM; } @@ -1297,6 +1307,13 @@ void change_syscall(struct __test_metadata *_metadata, { regs.SYSCALL_NUM = syscall; } +#elif defined(__mips__) + { + if (regs.SYSCALL_NUM == __NR_O32_Linux) + regs.SYSCALL_SYSCALL_NUM = syscall; + else + regs.SYSCALL_NUM = syscall; + } #elif defined(__arm__) # ifndef PTRACE_SET_SYSCALL @@ -1327,7 +1344,11 @@ void change_syscall(struct __test_metadata *_metadata, /* If syscall is skipped, change return value. */ if (syscall == -1) +#ifdef SYSCALL_NUM_RET_SHARE_REG + TH_LOG("Can't modify syscall return on this architecture"); +#else regs.SYSCALL_RET = 1; +#endif #ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); @@ -1465,8 +1486,13 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); +#ifdef SYSCALL_NUM_RET_SHARE_REG + /* gettid has been skipped */ + EXPECT_EQ(-1, syscall(__NR_gettid)); +#else /* gettid has been skipped and an altered return value stored. */ EXPECT_EQ(1, syscall(__NR_gettid)); +#endif EXPECT_NE(self->mytid, syscall(__NR_gettid)); } @@ -1497,15 +1523,15 @@ TEST_F(TRACE_syscall, syscall_dropped) #define SECCOMP_SET_MODE_FILTER 1 #endif -#ifndef SECCOMP_FLAG_FILTER_TSYNC -#define SECCOMP_FLAG_FILTER_TSYNC 1 +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 #endif #ifndef seccomp -int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +int seccomp(unsigned int op, unsigned int flags, void *args) { errno = 0; - return syscall(__NR_seccomp, op, flags, filter); + return syscall(__NR_seccomp, op, flags, args); } #endif @@ -1613,7 +1639,7 @@ TEST(TSYNC_first) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); @@ -1831,7 +1857,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor) self->sibling_count++; } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret) { TH_LOG("Could install filter on all threads!"); @@ -1892,7 +1918,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); @@ -1940,7 +1966,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) self->sibling_count++; } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(self->sibling[0].system_tid, ret) { TH_LOG("Did not fail on diverged sibling."); @@ -1992,7 +2018,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); } - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(ret, self->sibling[0].system_tid) { TH_LOG("Did not fail on diverged sibling."); @@ -2021,7 +2047,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) /* Switch to the remaining sibling */ sib = !sib; - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret) { TH_LOG("Expected the remaining sibling to sync"); @@ -2044,7 +2070,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) while (!kill(self->sibling[sib].system_tid, 0)) sleep(0.1); - ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); ASSERT_EQ(0, ret); /* just us chickens */ } diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile new file mode 100644 index 000000000000..56af56eda6fa --- /dev/null +++ b/tools/testing/selftests/sigaltstack/Makefile @@ -0,0 +1,8 @@ +CFLAGS = -Wall +BINARIES = sas +all: $(BINARIES) + +include ../lib.mk + +clean: + rm -rf $(BINARIES) diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c new file mode 100644 index 000000000000..1bb01258e559 --- /dev/null +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -0,0 +1,176 @@ +/* + * Stas Sergeev <stsp@users.sourceforge.net> + * + * test sigaltstack(SS_ONSTACK | SS_AUTODISARM) + * If that succeeds, then swapcontext() can be used inside sighandler safely. + * + */ + +#define _GNU_SOURCE +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <ucontext.h> +#include <alloca.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#ifndef SS_AUTODISARM +#define SS_AUTODISARM (1U << 31) +#endif + +static void *sstack, *ustack; +static ucontext_t uc, sc; +static const char *msg = "[OK]\tStack preserved"; +static const char *msg2 = "[FAIL]\tStack corrupted"; +struct stk_data { + char msg[128]; + int flag; +}; + +void my_usr1(int sig, siginfo_t *si, void *u) +{ + char *aa; + int err; + stack_t stk; + struct stk_data *p; + + register unsigned long sp asm("sp"); + + if (sp < (unsigned long)sstack || + sp >= (unsigned long)sstack + SIGSTKSZ) { + printf("[FAIL]\tSP is not on sigaltstack\n"); + exit(EXIT_FAILURE); + } + /* put some data on stack. other sighandler will try to overwrite it */ + aa = alloca(1024); + assert(aa); + p = (struct stk_data *)(aa + 512); + strcpy(p->msg, msg); + p->flag = 1; + printf("[RUN]\tsignal USR1\n"); + err = sigaltstack(NULL, &stk); + if (err) { + perror("[FAIL]\tsigaltstack()"); + exit(EXIT_FAILURE); + } + if (stk.ss_flags != SS_DISABLE) + printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n", + stk.ss_flags); + else + printf("[OK]\tsigaltstack is disabled in sighandler\n"); + swapcontext(&sc, &uc); + printf("%s\n", p->msg); + if (!p->flag) { + printf("[RUN]\tAborting\n"); + exit(EXIT_FAILURE); + } +} + +void my_usr2(int sig, siginfo_t *si, void *u) +{ + char *aa; + struct stk_data *p; + + printf("[RUN]\tsignal USR2\n"); + aa = alloca(1024); + /* dont run valgrind on this */ + /* try to find the data stored by previous sighandler */ + p = memmem(aa, 1024, msg, strlen(msg)); + if (p) { + printf("[FAIL]\tsigaltstack re-used\n"); + /* corrupt the data */ + strcpy(p->msg, msg2); + /* tell other sighandler that his data is corrupted */ + p->flag = 0; + } +} + +static void switch_fn(void) +{ + printf("[RUN]\tswitched to user ctx\n"); + raise(SIGUSR2); + setcontext(&sc); +} + +int main(void) +{ + struct sigaction act; + stack_t stk; + int err; + + sigemptyset(&act.sa_mask); + act.sa_flags = SA_ONSTACK | SA_SIGINFO; + act.sa_sigaction = my_usr1; + sigaction(SIGUSR1, &act, NULL); + act.sa_sigaction = my_usr2; + sigaction(SIGUSR2, &act, NULL); + sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (sstack == MAP_FAILED) { + perror("mmap()"); + return EXIT_FAILURE; + } + + err = sigaltstack(NULL, &stk); + if (err) { + perror("[FAIL]\tsigaltstack()"); + exit(EXIT_FAILURE); + } + if (stk.ss_flags == SS_DISABLE) { + printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n"); + } else { + printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags); + return EXIT_FAILURE; + } + + stk.ss_sp = sstack; + stk.ss_size = SIGSTKSZ; + stk.ss_flags = SS_ONSTACK | SS_AUTODISARM; + err = sigaltstack(&stk, NULL); + if (err) { + if (errno == EINVAL) { + printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); + /* + * If test cases for the !SS_AUTODISARM variant were + * added, we could still run them. We don't have any + * test cases like that yet, so just exit and report + * success. + */ + return 0; + } else { + perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)"); + return EXIT_FAILURE; + } + } + + ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (ustack == MAP_FAILED) { + perror("mmap()"); + return EXIT_FAILURE; + } + getcontext(&uc); + uc.uc_link = NULL; + uc.uc_stack.ss_sp = ustack; + uc.uc_stack.ss_size = SIGSTKSZ; + makecontext(&uc, switch_fn, 0); + raise(SIGUSR1); + + err = sigaltstack(NULL, &stk); + if (err) { + perror("[FAIL]\tsigaltstack()"); + exit(EXIT_FAILURE); + } + if (stk.ss_flags != SS_AUTODISARM) { + printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n", + stk.ss_flags); + exit(EXIT_FAILURE); + } + printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n"); + + printf("[OK]\tTest passed\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d5ce7d7aae3e..c73425de3cfe 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,10 +5,11 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ - check_initial_reg_state sigreturn ldt_gdt + check_initial_reg_state sigreturn ldt_gdt iopl TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer +TARGETS_C_64BIT_ONLY := fsgsbase TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c new file mode 100644 index 000000000000..5b2b4b3c634c --- /dev/null +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -0,0 +1,398 @@ +/* + * fsgsbase.c, an fsgsbase test + * Copyright (c) 2014-2016 Andy Lutomirski + * GPL v2 + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <err.h> +#include <sys/user.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <signal.h> +#include <limits.h> +#include <sys/ucontext.h> +#include <sched.h> +#include <linux/futex.h> +#include <pthread.h> +#include <asm/ldt.h> +#include <sys/mman.h> + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +static volatile sig_atomic_t want_segv; +static volatile unsigned long segv_addr; + +static int nerrs; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (!want_segv) { + clearhandler(SIGSEGV); + return; /* Crash cleanly. */ + } + + want_segv = false; + segv_addr = (unsigned long)si->si_addr; + + ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */ + +} + +enum which_base { FS, GS }; + +static unsigned long read_base(enum which_base which) +{ + unsigned long offset; + /* + * Unless we have FSGSBASE, there's no direct way to do this from + * user mode. We can get at it indirectly using signals, though. + */ + + want_segv = true; + + offset = 0; + if (which == FS) { + /* Use a constant-length instruction here. */ + asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax"); + } else { + asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax"); + } + if (!want_segv) + return segv_addr + offset; + + /* + * If that didn't segfault, try the other end of the address space. + * Unless we get really unlucky and run into the vsyscall page, this + * is guaranteed to segfault. + */ + + offset = (ULONG_MAX >> 1) + 1; + if (which == FS) { + asm volatile ("mov %%fs:(%%rcx), %%rax" + : : "c" (offset) : "rax"); + } else { + asm volatile ("mov %%gs:(%%rcx), %%rax" + : : "c" (offset) : "rax"); + } + if (!want_segv) + return segv_addr + offset; + + abort(); +} + +static void check_gs_value(unsigned long value) +{ + unsigned long base; + unsigned short sel; + + printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0) + err(1, "ARCH_SET_GS"); + + asm volatile ("mov %%gs, %0" : "=rm" (sel)); + base = read_base(GS); + if (base == value) { + printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n", + sel); + } else { + nerrs++; + printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n", + base, sel); + } + + if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0) + err(1, "ARCH_GET_GS"); + if (base == value) { + printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n", + sel); + } else { + nerrs++; + printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n", + base, sel); + } +} + +static void mov_0_gs(unsigned long initial_base, bool schedule) +{ + unsigned long base, arch_base; + + printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : ""); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0) + err(1, "ARCH_SET_GS"); + + if (schedule) + usleep(10); + + asm volatile ("mov %0, %%gs" : : "rm" (0)); + base = read_base(GS); + if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0) + err(1, "ARCH_GET_GS"); + if (base == arch_base) { + printf("[OK]\tGSBASE is 0x%lx\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base); + } +} + +static volatile unsigned long remote_base; +static volatile bool remote_hard_zero; +static volatile unsigned int ftx; + +/* + * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO + * means to force the selector to zero to improve test coverage. + */ +#define HARD_ZERO 0xa1fa5f343cb85fa4 + +static void do_remote_base() +{ + unsigned long to_set = remote_base; + bool hard_zero = false; + if (to_set == HARD_ZERO) { + to_set = 0; + hard_zero = true; + } + + if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0) + err(1, "ARCH_SET_GS"); + + if (hard_zero) + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + unsigned short sel; + asm volatile ("mov %%gs, %0" : "=rm" (sel)); + printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n", + to_set, hard_zero ? " and clear gs" : "", sel); +} + +void do_unexpected_base(void) +{ + /* + * The goal here is to try to arrange for GS == 0, GSBASE != + * 0, and for the the kernel the think that GSBASE == 0. + * + * To make the test as reliable as possible, this uses + * explicit descriptorss. (This is not the only way. This + * could use ARCH_SET_GS with a low, nonzero base, but the + * relevant side effect of ARCH_SET_GS could change.) + */ + + /* Step 1: tell the kernel that we have GSBASE == 0. */ + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0) + err(1, "ARCH_SET_GS"); + + /* Step 2: change GSBASE without telling the kernel. */ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0xBAADF00D, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { + printf("\tother thread: using LDT slot 0\n"); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); + } else { + /* No modify_ldt for us (configured out, perhaps) */ + + struct user_desc *low_desc = mmap( + NULL, sizeof(desc), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + memcpy(low_desc, &desc, sizeof(desc)); + + low_desc->entry_number = -1; + + /* 32-bit set_thread_area */ + long ret; + asm volatile ("int $0x80" + : "=a" (ret) : "a" (243), "b" (low_desc) + : "flags"); + memcpy(&desc, low_desc, sizeof(desc)); + munmap(low_desc, sizeof(desc)); + + if (ret != 0) { + printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); + return; + } + printf("\tother thread: using GDT slot %d\n", desc.entry_number); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); + } + + /* + * Step 3: set the selector back to zero. On AMD chips, this will + * preserve GSBASE. + */ + + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); +} + +static void *threadproc(void *ctx) +{ + while (1) { + while (ftx == 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + if (ftx == 3) + return NULL; + + if (ftx == 1) + do_remote_base(); + else if (ftx == 2) + do_unexpected_base(); + else + errx(1, "helper thread got bad command"); + + ftx = 0; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + } +} + +static void set_gs_and_switch_to(unsigned long local, unsigned long remote) +{ + unsigned long base; + + bool hard_zero = false; + if (local == HARD_ZERO) { + hard_zero = true; + local = 0; + } + + printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n", + local, hard_zero ? " and clear gs" : "", remote); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0) + err(1, "ARCH_SET_GS"); + if (hard_zero) + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + if (read_base(GS) != local) { + nerrs++; + printf("[FAIL]\tGSBASE wasn't set as expected\n"); + } + + remote_base = remote; + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + while (ftx != 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); + + base = read_base(GS); + if (base == local) { + printf("[OK]\tGSBASE remained 0x%lx\n", local); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx\n", base); + } +} + +static void test_unexpected_base(void) +{ + unsigned long base; + + printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0) + err(1, "ARCH_SET_GS"); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + ftx = 2; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + while (ftx != 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); + + base = read_base(GS); + if (base == 0) { + printf("[OK]\tGSBASE remained 0\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx\n", base); + } +} + +int main() +{ + pthread_t thread; + + sethandler(SIGSEGV, sigsegv, 0); + + check_gs_value(0); + check_gs_value(1); + check_gs_value(0x200000000); + check_gs_value(0); + check_gs_value(0x200000000); + check_gs_value(1); + + for (int sched = 0; sched < 2; sched++) { + mov_0_gs(0, !!sched); + mov_0_gs(1, !!sched); + mov_0_gs(0x200000000, !!sched); + } + + /* Set up for multithreading. */ + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); /* should never fail */ + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + static unsigned long bases_with_hard_zero[] = { + 0, HARD_ZERO, 1, 0x200000000, + }; + + for (int local = 0; local < 4; local++) { + for (int remote = 0; remote < 4; remote++) { + set_gs_and_switch_to(bases_with_hard_zero[local], + bases_with_hard_zero[remote]); + } + } + + test_unexpected_base(); + + ftx = 3; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + return nerrs == 0 ? 0 : 1; +} diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c new file mode 100644 index 000000000000..c496ca97bc18 --- /dev/null +++ b/tools/testing/selftests/x86/iopl.c @@ -0,0 +1,135 @@ +/* + * iopl.c - Test case for a Linux on Xen 64-bit bug + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <sched.h> +#include <sys/io.h> + +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +int main(void) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + /* Probe for iopl support. Note that iopl(0) works even as nonroot. */ + if (iopl(3) != 0) { + printf("[OK]\tiopl(3) failed (%d) -- try running as root\n", + errno); + return 0; + } + + /* Restore our original state prior to starting the test. */ + if (iopl(0) != 0) + err(1, "iopl(0)"); + + pid_t child = fork(); + if (child == -1) + err(1, "fork"); + + if (child == 0) { + printf("\tchild: set IOPL to 3\n"); + if (iopl(3) != 0) + err(1, "iopl"); + + printf("[RUN]\tchild: write to 0x80\n"); + asm volatile ("outb %%al, $0x80" : : "a" (0)); + + return 0; + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tparent: write to 0x80 (should fail)\n"); + + sethandler(SIGSEGV, sigsegv, 0); + if (sigsetjmp(jmpbuf, 1) != 0) { + printf("[OK]\twrite was denied\n"); + } else { + asm volatile ("outb %%al, $0x80" : : "a" (0)); + printf("[FAIL]\twrite was allowed\n"); + nerrs++; + } + + /* Test the capability checks. */ + printf("\tiopl(3)\n"); + if (iopl(3) != 0) + err(1, "iopl(3)"); + + printf("\tDrop privileges\n"); + if (setresuid(1, 1, 1) != 0) { + printf("[WARN]\tDropping privileges failed\n"); + goto done; + } + + printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n"); + if (iopl(3) != 0) { + printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(0) unprivileged\n"); + if (iopl(0) != 0) { + printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n"); + nerrs++; + } + + printf("[RUN]\tiopl(3) unprivileged\n"); + if (iopl(3) == 0) { + printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n"); + nerrs++; + } else { + printf("[OK]\tFailed as expected\n"); + } + +done: + return nerrs ? 1 : 0; +} + diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 31a3035cd4eb..4af47079cf04 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -21,6 +21,9 @@ #include <pthread.h> #include <sched.h> #include <linux/futex.h> +#include <sys/mman.h> +#include <asm/prctl.h> +#include <sys/prctl.h> #define AR_ACCESSED (1<<8) @@ -44,6 +47,12 @@ static int nerrs; +/* Points to an array of 1024 ints, each holding its own index. */ +static const unsigned int *counter_page; +static struct user_desc *low_user_desc; +static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */ +static int gdt_entry_num; + static void check_invalid_segment(uint16_t index, int ldt) { uint32_t has_limit = 0, has_ar = 0, limit, ar; @@ -561,16 +570,257 @@ static void do_exec_test(void) } } +static void setup_counter_page(void) +{ + unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0); + if (page == MAP_FAILED) + err(1, "mmap"); + + for (int i = 0; i < 1024; i++) + page[i] = i; + counter_page = page; +} + +static int invoke_set_thread_area(void) +{ + int ret; + asm volatile ("int $0x80" + : "=a" (ret), "+m" (low_user_desc) : + "a" (243), "b" (low_user_desc) + : "flags"); + return ret; +} + +static void setup_low_user_desc(void) +{ + low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc), + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0); + if (low_user_desc == MAP_FAILED) + err(1, "mmap"); + + low_user_desc->entry_number = -1; + low_user_desc->base_addr = (unsigned long)&counter_page[1]; + low_user_desc->limit = 0xfffff; + low_user_desc->seg_32bit = 1; + low_user_desc->contents = 0; /* Data, grow-up*/ + low_user_desc->read_exec_only = 0; + low_user_desc->limit_in_pages = 1; + low_user_desc->seg_not_present = 0; + low_user_desc->useable = 0; + + if (invoke_set_thread_area() == 0) { + gdt_entry_num = low_user_desc->entry_number; + printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num); + } else { + printf("[NOTE]\tset_thread_area is unavailable\n"); + } + + low_user_desc_clear = low_user_desc + 1; + low_user_desc_clear->entry_number = gdt_entry_num; + low_user_desc_clear->read_exec_only = 1; + low_user_desc_clear->seg_not_present = 1; +} + +static void test_gdt_invalidation(void) +{ + if (!gdt_entry_num) + return; /* 64-bit only system -- we can't use set_thread_area */ + + unsigned short prev_sel; + unsigned short sel; + unsigned int eax; + const char *result; +#ifdef __x86_64__ + unsigned long saved_base; + unsigned long new_base; +#endif + + /* Test DS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; + asm volatile ("movw %%ds, %[prev_sel]\n\t" + "movw %[sel], %%ds\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate ds */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%ds, %[sel]\n\t" + "movw %[prev_sel], %%ds" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n", + result, sel); + + /* Test ES */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; + asm volatile ("movw %%es, %[prev_sel]\n\t" + "movw %[sel], %%es\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate es */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%es, %[sel]\n\t" + "movw %[prev_sel], %%es" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n", + result, sel); + + /* Test FS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base); +#endif + asm volatile ("movw %%fs, %[prev_sel]\n\t" + "movw %[sel], %%fs\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate fs */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%fs, %[sel]\n\t" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); +#endif + + /* Restore FS/BASE for glibc */ + asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel)); +#ifdef __x86_64__ + if (saved_base) + syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base); +#endif + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n", + result, sel); + +#ifdef __x86_64__ + if (sel == 0 && new_base != 0) { + nerrs++; + printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base); + } else { + printf("[OK]\tNew FSBASE was zero\n"); + } +#endif + + /* Test GS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base); +#endif + asm volatile ("movw %%gs, %[prev_sel]\n\t" + "movw %[sel], %%gs\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate gs */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%gs, %[sel]\n\t" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); +#endif + + /* Restore GS/BASE for glibc */ + asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel)); +#ifdef __x86_64__ + if (saved_base) + syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base); +#endif + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n", + result, sel); + +#ifdef __x86_64__ + if (sel == 0 && new_base != 0) { + nerrs++; + printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base); + } else { + printf("[OK]\tNew GSBASE was zero\n"); + } +#endif +} + int main(int argc, char **argv) { if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) return finish_exec_test(); + setup_counter_page(); + setup_low_user_desc(); + do_simple_tests(); do_multicpu_tests(); do_exec_test(); + test_gdt_invalidation(); + return nerrs ? 1 : 0; } diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h new file mode 100644 index 000000000000..4f93af89ae16 --- /dev/null +++ b/tools/virtio/linux/dma-mapping.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_DMA_MAPPING_H +#define _LINUX_DMA_MAPPING_H + +#ifdef CONFIG_HAS_DMA +# error Virtio userspace code does not support CONFIG_HAS_DMA +#endif + +#define PCI_DMA_BUS_IS_PHYS 1 + +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +#endif diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 5a6016224bb9..e92903fc7113 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -61,6 +61,8 @@ #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1) #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +#define MAX_SWAPFILES_SHIFT 5 +#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT) #define PM_SOFT_DIRTY (1ULL << 55) #define PM_MMAP_EXCLUSIVE (1ULL << 56) #define PM_FILE (1ULL << 61) @@ -73,6 +75,7 @@ #define KPF_BYTES 8 #define PROC_KPAGEFLAGS "/proc/kpageflags" +#define PROC_KPAGECGROUP "/proc/kpagecgroup" /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 @@ -92,7 +95,8 @@ #define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 #define KPF_SLUB_DEBUG 51 -#define KPF_FILE 62 +#define KPF_FILE 61 +#define KPF_SWAP 62 #define KPF_MMAP_EXCLUSIVE 63 #define KPF_ALL_BITS ((uint64_t)~0ULL) @@ -146,6 +150,7 @@ static const char * const page_flag_names[] = { [KPF_SLUB_DEBUG] = "E:slub_debug", [KPF_FILE] = "F:file", + [KPF_SWAP] = "w:swap", [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive", }; @@ -164,7 +169,9 @@ static int opt_raw; /* for kernel developers */ static int opt_list; /* list pages (in ranges) */ static int opt_no_summary; /* don't show summary */ static pid_t opt_pid; /* process to walk */ -const char * opt_file; +const char * opt_file; /* file or directory path */ +static uint64_t opt_cgroup; /* cgroup inode */ +static int opt_list_cgroup;/* list page cgroup */ #define MAX_ADDR_RANGES 1024 static int nr_addr_ranges; @@ -185,6 +192,7 @@ static int page_size; static int pagemap_fd; static int kpageflags_fd; +static int kpagecgroup_fd = -1; static int opt_hwpoison; static int opt_unpoison; @@ -278,6 +286,16 @@ static unsigned long kpageflags_read(uint64_t *buf, return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); } +static unsigned long kpagecgroup_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + if (kpagecgroup_fd < 0) + return pages; + + return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages); +} + static unsigned long pagemap_read(uint64_t *buf, unsigned long index, unsigned long pages) @@ -297,6 +315,10 @@ static unsigned long pagemap_pfn(uint64_t val) return pfn; } +static unsigned long pagemap_swap_offset(uint64_t val) +{ + return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0; +} /* * page flag names @@ -346,14 +368,15 @@ static char *page_flag_longname(uint64_t flags) */ static void show_page_range(unsigned long voffset, unsigned long offset, - unsigned long size, uint64_t flags) + unsigned long size, uint64_t flags, uint64_t cgroup) { static uint64_t flags0; + static uint64_t cgroup0; static unsigned long voff; static unsigned long index; static unsigned long count; - if (flags == flags0 && offset == index + count && + if (flags == flags0 && cgroup == cgroup0 && offset == index + count && size && voffset == voff + count) { count += size; return; @@ -364,11 +387,14 @@ static void show_page_range(unsigned long voffset, unsigned long offset, printf("%lx\t", voff); if (opt_file) printf("%lu\t", voff); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } flags0 = flags; + cgroup0= cgroup; index = offset; voff = voffset; count = size; @@ -376,16 +402,18 @@ static void show_page_range(unsigned long voffset, unsigned long offset, static void flush_page_range(void) { - show_page_range(0, 0, 0, 0); + show_page_range(0, 0, 0, 0, 0); } -static void show_page(unsigned long voffset, - unsigned long offset, uint64_t flags) +static void show_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup) { if (opt_pid) printf("%lx\t", voffset); if (opt_file) printf("%lu\t", voffset); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup); printf("%lx\t%s\n", offset, page_flag_name(flags)); } @@ -452,6 +480,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) flags |= BIT(SOFTDIRTY); if (pme & PM_FILE) flags |= BIT(FILE); + if (pme & PM_SWAP) + flags |= BIT(SWAP); if (pme & PM_MMAP_EXCLUSIVE) flags |= BIT(MMAP_EXCLUSIVE); @@ -566,23 +596,26 @@ static size_t hash_slot(uint64_t flags) exit(EXIT_FAILURE); } -static void add_page(unsigned long voffset, - unsigned long offset, uint64_t flags, uint64_t pme) +static void add_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup, uint64_t pme) { flags = kpageflags_flags(flags, pme); if (!bit_mask_ok(flags)) return; + if (opt_cgroup && cgroup != (uint64_t)opt_cgroup) + return; + if (opt_hwpoison) hwpoison_page(offset); if (opt_unpoison) unpoison_page(offset); if (opt_list == 1) - show_page_range(voffset, offset, 1, flags); + show_page_range(voffset, offset, 1, flags, cgroup); else if (opt_list == 2) - show_page(voffset, offset, flags); + show_page(voffset, offset, flags, cgroup); nr_pages[hash_slot(flags)]++; total_pages++; @@ -595,24 +628,57 @@ static void walk_pfn(unsigned long voffset, uint64_t pme) { uint64_t buf[KPAGEFLAGS_BATCH]; + uint64_t cgi[KPAGEFLAGS_BATCH]; unsigned long batch; unsigned long pages; unsigned long i; + /* + * kpagecgroup_read() reads only if kpagecgroup were opened, but + * /proc/kpagecgroup might even not exist, so it's better to fill + * them with zeros here. + */ + if (count == 1) + cgi[0] = 0; + else + memset(cgi, 0, sizeof cgi); + while (count) { batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); pages = kpageflags_read(buf, index, batch); if (pages == 0) break; + if (kpagecgroup_read(cgi, index, pages) != pages) + fatal("kpagecgroup returned fewer pages than expected"); + for (i = 0; i < pages; i++) - add_page(voffset + i, index + i, buf[i], pme); + add_page(voffset + i, index + i, buf[i], cgi[i], pme); index += pages; count -= pages; } } +static void walk_swap(unsigned long voffset, uint64_t pme) +{ + uint64_t flags = kpageflags_flags(0, pme); + + if (!bit_mask_ok(flags)) + return; + + if (opt_cgroup) + return; + + if (opt_list == 1) + show_page_range(voffset, pagemap_swap_offset(pme), 1, flags, 0); + else if (opt_list == 2) + show_page(voffset, pagemap_swap_offset(pme), flags, 0); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + #define PAGEMAP_BATCH (64 << 10) static void walk_vma(unsigned long index, unsigned long count) { @@ -632,6 +698,8 @@ static void walk_vma(unsigned long index, unsigned long count) pfn = pagemap_pfn(buf[i]); if (pfn) walk_pfn(index + i, pfn, 1, buf[i]); + if (buf[i] & PM_SWAP) + walk_swap(index + i, buf[i]); } index += pages; @@ -713,10 +781,12 @@ static void usage(void) " -d|--describe flags Describe flags\n" " -a|--addr addr-spec Walk a range of pages\n" " -b|--bits bits-spec Walk pages with specified bits\n" +" -c|--cgroup path|@inode Walk pages within memory cgroup\n" " -p|--pid pid Walk process address space\n" " -f|--file filename Walk file address space\n" " -l|--list Show page details in ranges\n" " -L|--list-each Show page details one by one\n" +" -C|--list-cgroup Show cgroup inode for pages\n" " -N|--no-summary Don't show summary info\n" " -X|--hwpoison hwpoison pages\n" " -x|--unpoison unpoison pages\n" @@ -851,6 +921,7 @@ static void walk_file(const char *name, const struct stat *st) { uint8_t vec[PAGEMAP_BATCH]; uint64_t buf[PAGEMAP_BATCH], flags; + uint64_t cgroup = 0; unsigned long nr_pages, pfn, i; off_t off, end = st->st_size; int fd; @@ -908,12 +979,15 @@ got_sigbus: continue; if (!kpageflags_read(&flags, pfn, 1)) continue; + if (!kpagecgroup_read(&cgroup, pfn, 1)) + fatal("kpagecgroup_read failed"); if (first && opt_list) { first = 0; flush_page_range(); show_file(name, st); } - add_page(off / page_size + i, pfn, flags, buf[i]); + add_page(off / page_size + i, pfn, + flags, cgroup, buf[i]); } } @@ -965,6 +1039,24 @@ static void parse_file(const char *name) opt_file = name; } +static void parse_cgroup(const char *path) +{ + if (path[0] == '@') { + opt_cgroup = parse_number(path + 1); + return; + } + + struct stat st; + + if (stat(path, &st)) + fatal("stat failed: %s: %m\n", path); + + if (!S_ISDIR(st.st_mode)) + fatal("cgroup supposed to be a directory: %s\n", path); + + opt_cgroup = st.st_ino; +} + static void parse_addr_range(const char *optarg) { unsigned long offset; @@ -1088,9 +1180,11 @@ static const struct option opts[] = { { "file" , 1, NULL, 'f' }, { "addr" , 1, NULL, 'a' }, { "bits" , 1, NULL, 'b' }, + { "cgroup" , 1, NULL, 'c' }, { "describe" , 1, NULL, 'd' }, { "list" , 0, NULL, 'l' }, { "list-each" , 0, NULL, 'L' }, + { "list-cgroup", 0, NULL, 'C' }, { "no-summary", 0, NULL, 'N' }, { "hwpoison" , 0, NULL, 'X' }, { "unpoison" , 0, NULL, 'x' }, @@ -1105,7 +1199,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); while ((c = getopt_long(argc, argv, - "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) { + "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) { switch (c) { case 'r': opt_raw = 1; @@ -1122,6 +1216,12 @@ int main(int argc, char *argv[]) case 'b': parse_bits_mask(optarg); break; + case 'c': + parse_cgroup(optarg); + break; + case 'C': + opt_list_cgroup = 1; + break; case 'd': describe_flags(optarg); exit(0); @@ -1151,10 +1251,15 @@ int main(int argc, char *argv[]) } } + if (opt_cgroup || opt_list_cgroup) + kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); + if (opt_list && opt_pid) printf("voffset\t"); if (opt_list && opt_file) printf("foffset\t"); + if (opt_list && opt_list_cgroup) + printf("cgroup\t"); if (opt_list == 1) printf("offset\tlen\tflags\n"); if (opt_list == 2) |