From cfbd41b786519d4a15e1c15181556689bcf6635a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 15 Apr 2020 12:31:26 -0300 Subject: perf stat: Honour --timeout for forked workloads When --timeout is used and a workload is specified to be started by 'perf stat', i.e. $ perf stat --timeout 1000 sleep 1h The --timeout wasn't being honoured, i.e. the workload, 'sleep 1h' in the above example, should be terminated after 1000ms, but it wasn't, 'perf stat' was waiting for it to finish. Fix it by sending a SIGTERM when the timeout expires. Now it works: # perf stat -e cycles --timeout 1234 sleep 1h sleep: Terminated Performance counter stats for 'sleep 1h': 1,066,692 cycles 1.234314838 seconds time elapsed 0.000750000 seconds user 0.000000000 seconds sys # Fixes: f1f8ad52f8bf ("perf stat: Add support to print counts after a period of time") Reported-by: Konstantin Kharlamov Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207243 Tested-by: Konstantin Kharlamov Cc: Adrian Hunter Acked-by: Jiri Olsa Tested-by: Jiri Olsa Cc: Namhyung Kim Cc: yuzhoujian Link: https://lore.kernel.org/lkml/20200415153803.GB20324@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ec053dc1e35c..9207b6c45475 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -686,8 +686,11 @@ try_again_reset: break; } } - if (child_pid != -1) + if (child_pid != -1) { + if (timeout) + kill(child_pid, SIGTERM); wait4(child_pid, &status, 0, &stat_config.ru_data); + } if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); -- cgit v1.2.3-59-g8ed1b From 943930e4729a64c11142a0370415663b39189996 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:08 +0100 Subject: perf tools: Synthesize bpf_trampoline/dispatcher ksymbol event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthesize bpf images (trampolines/dispatchers) on start, as ksymbol events from /proc/kallsyms. Having this perf can recognize samples from those images and perf report and top shows them correctly. The rest of the ksymbol handling is already in place from for the bpf programs monitoring, so only the initial state was needed. perf report output: # Overhead Command Shared Object Symbol 12.37% test_progs [kernel.vmlinux] [k] entry_SYSCALL_64 11.80% test_progs [kernel.vmlinux] [k] syscall_return_via_sysret 9.63% test_progs bpf_prog_bcf7977d3b93787c_prog2 [k] bpf_prog_bcf7977d3b93787c_prog2 6.90% test_progs bpf_trampoline_24456 [k] bpf_trampoline_24456 6.36% test_progs [kernel.vmlinux] [k] memcpy_erms Committer notes: Use scnprintf() instead of strncpy() to overcome this on fedora:32, rawhide and OpenMandriva Cooker: CC /tmp/build/perf/util/bpf-event.o In file included from /usr/include/string.h:495, from /git/linux/tools/lib/bpf/libbpf_common.h:12, from /git/linux/tools/lib/bpf/bpf.h:31, from util/bpf-event.c:4: In function 'strncpy', inlined from 'process_bpf_image' at util/bpf-event.c:323:2, inlined from 'kallsyms_process_symbol' at util/bpf-event.c:358:9: /usr/include/bits/string_fortified.h:106:10: error: '__builtin_strncpy' specified bound 256 equals destination size [-Werror=stringop-truncation] 106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Signed-off-by: Jiri Olsa Acked-by: Song Liu Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Björn Töpel Cc: Daniel Borkmann Cc: David S. Miller Cc: Jakub Kicinski Cc: Jesper Dangaard Brouer Cc: John Fastabend Cc: Martin KaFai Lau Cc: Yonghong Song Link: https://lore.kernel.org/bpf/20200312195610.346362-14-jolsa@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 93 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a3207d900339..0cd41a862952 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include #include "bpf-event.h" #include "debug.h" #include "dso.h" @@ -290,11 +293,82 @@ out: return err ? -1 : 0; } +struct kallsyms_parse { + union perf_event *event; + perf_event__handler_t process; + struct machine *machine; + struct perf_tool *tool; +}; + +static int +process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data) +{ + struct machine *machine = data->machine; + union perf_event *event = data->event; + struct perf_record_ksymbol *ksymbol; + int len; + + ksymbol = &event->ksymbol; + + *ksymbol = (struct perf_record_ksymbol) { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = offsetof(struct perf_record_ksymbol, name), + }, + .addr = addr, + .len = page_size, + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + + len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name); + ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64)); + memset((void *) event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + + return perf_tool__process_synth_event(data->tool, event, machine, + data->process); +} + +static int +kallsyms_process_symbol(void *data, const char *_name, + char type __maybe_unused, u64 start) +{ + char disp[KSYM_NAME_LEN]; + char *module, *name; + unsigned long id; + int err = 0; + + module = strchr(_name, '\t'); + if (!module) + return 0; + + /* We are going after [bpf] module ... */ + if (strcmp(module + 1, "[bpf]")) + return 0; + + name = memdup(_name, (module - _name) + 1); + if (!name) + return -ENOMEM; + + name[module - _name] = 0; + + /* .. and only for trampolines and dispatchers */ + if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) || + (sscanf(name, "bpf_dispatcher_%s", disp) == 1)) + err = process_bpf_image(name, start, data); + + free(name); + return err; +} + int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) { + const char *kallsyms_filename = "/proc/kallsyms"; + struct kallsyms_parse arg; union perf_event *event; __u32 id = 0; int err; @@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; + + /* Synthesize all the bpf programs in system. */ while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { @@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, break; } } + + /* Synthesize all the bpf images - trampolines/dispatchers. */ + if (symbol_conf.kallsyms_name != NULL) + kallsyms_filename = symbol_conf.kallsyms_name; + + arg = (struct kallsyms_parse) { + .event = event, + .process = process, + .machine = machine, + .tool = session->tool, + }; + + if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) { + pr_err("%s: failed to synthesize bpf images: %s\n", + __func__, strerror(errno)); + } + free(event); return err; } -- cgit v1.2.3-59-g8ed1b From 7eddf7e74e54aea3b24410b3fb8911927836632f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:09 +0100 Subject: perf machine: Set ksymbol dso as loaded on arrival MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no special load action for ksymbol data on map__load/dso__load action, where the kernel is getting loaded. It only gets confused with kernel kallsyms/vmlinux load for bpf object, which fails and could mess up with the map. Disabling any further load of the map for ksymbol related dso/map. Signed-off-by: Jiri Olsa Acked-by: Song Liu Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Björn Töpel Cc: Daniel Borkmann Cc: David S. Miller Cc: Jakub Kicinski Cc: Jesper Dangaard Brouer Cc: John Fastabend Cc: Martin KaFai Lau Cc: Yonghong Song Link: https://lore.kernel.org/bpf/20200312195610.346362-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 97142e9671be..06aa4e4db63d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -759,6 +759,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); + dso__set_loaded(dso); } sym = symbol__new(map->map_ip(map, map->start), -- cgit v1.2.3-59-g8ed1b From 3c29d4483e855b6ba5c6e35b0c81caad7d9e3984 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:10 +0100 Subject: perf annotate: Add basic support for bpf_image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the DSO_BINARY_TYPE__BPF_IMAGE dso binary type to recognize BPF images that carry trampoline or dispatcher. Upcoming patches will add support to read the image data, store it within the BPF feature in perf.data and display it for annotation purposes. Currently we only display following message: # ./perf annotate bpf_trampoline_24456 --stdio Percent | Source code & Disassembly of . for cycles (504 ... --------------------------------------------------------------- ... : to be implemented Signed-off-by: Jiri Olsa Acked-by: Song Liu Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Björn Töpel Cc: Daniel Borkmann Cc: David S. Miller Cc: Jakub Kicinski Cc: Jesper Dangaard Brouer Cc: John Fastabend Cc: Martin KaFai Lau Cc: Yonghong Song Link: https://lore.kernel.org/bpf/20200312195610.346362-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 20 ++++++++++++++++++++ tools/perf/util/dso.c | 1 + tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 11 +++++++++++ tools/perf/util/symbol.c | 1 + 5 files changed, 34 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f1ea0d61eb5b..9760d58e979a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1821,6 +1821,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int +symbol__disassemble_bpf_image(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->line); + return 0; +} + /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If @@ -1920,6 +1938,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { return symbol__disassemble_bpf(sym, args); + } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 91f21239608b..f338990e0fe6 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -191,6 +191,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2db64b79617a..9553a1fd9e8a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -40,6 +40,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, + DSO_BINARY_TYPE__BPF_IMAGE, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 06aa4e4db63d..09845eae9c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int is_bpf_image(const char *name) +{ + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) || + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1); +} + static int machine__process_ksymbol_register(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -760,6 +766,11 @@ static int machine__process_ksymbol_register(struct machine *machine, map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); dso__set_loaded(dso); + + if (is_bpf_image(event->ksymbol.name)) { + dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE; + dso__set_long_name(dso, "", false); + } } sym = symbol__new(map->map_ip(map, map->start), diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 26bc6a0096ce..8f4300492dc7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1544,6 +1544,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return true; case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; -- cgit v1.2.3-59-g8ed1b From 980737282232b752bb14dab96d77665c15889c36 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Thu, 2 Apr 2020 11:45:31 +0300 Subject: capabilities: Introduce CAP_PERFMON to kernel and user space Introduce the CAP_PERFMON capability designed to secure system performance monitoring and observability operations so that CAP_PERFMON can assist CAP_SYS_ADMIN capability in its governing role for performance monitoring and observability subsystems. CAP_PERFMON hardens system security and integrity during performance monitoring and observability operations by decreasing attack surface that is available to a CAP_SYS_ADMIN privileged process [2]. Providing the access to system performance monitoring and observability operations under CAP_PERFMON capability singly, without the rest of CAP_SYS_ADMIN credentials, excludes chances to misuse the credentials and makes the operation more secure. Thus, CAP_PERFMON implements the principle of least privilege for performance monitoring and observability operations (POSIX IEEE 1003.1e: 2.2.2.39 principle of least privilege: A security design principle that states that a process or program be granted only those privileges (e.g., capabilities) necessary to accomplish its legitimate function, and only for the time that such privileges are actually required) CAP_PERFMON meets the demand to secure system performance monitoring and observability operations for adoption in security sensitive, restricted, multiuser production environments (e.g. HPC clusters, cloud and virtual compute environments), where root or CAP_SYS_ADMIN credentials are not available to mass users of a system, and securely unblocks applicability and scalability of system performance monitoring and observability operations beyond root and CAP_SYS_ADMIN use cases. CAP_PERFMON takes over CAP_SYS_ADMIN credentials related to system performance monitoring and observability operations and balances amount of CAP_SYS_ADMIN credentials following the recommendations in the capabilities man page [1] for CAP_SYS_ADMIN: "Note: this capability is overloaded; see Notes to kernel developers, below." For backward compatibility reasons access to system performance monitoring and observability subsystems of the kernel remains open for CAP_SYS_ADMIN privileged processes but CAP_SYS_ADMIN capability usage for secure system performance monitoring and observability operations is discouraged with respect to the designed CAP_PERFMON capability. Although the software running under CAP_PERFMON can not ensure avoidance of related hardware issues, the software can still mitigate these issues following the official hardware issues mitigation procedure [2]. The bugs in the software itself can be fixed following the standard kernel development process [3] to maintain and harden security of system performance monitoring and observability operations. [1] http://man7.org/linux/man-pages/man7/capabilities.7.html [2] https://www.kernel.org/doc/html/latest/process/embargoed-hardware-issues.html [3] https://www.kernel.org/doc/html/latest/admin-guide/security-bugs.html Signed-off-by: Alexey Budankov Acked-by: James Morris Acked-by: Serge E. Hallyn Acked-by: Song Liu Acked-by: Stephen Smalley Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Igor Lubashev Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Cc: linux-doc@vger.kernel.org Cc: linux-man@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: selinux@vger.kernel.org Link: http://lore.kernel.org/lkml/5590d543-82c6-490a-6544-08e6a5517db0@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/capability.h | 4 ++++ include/uapi/linux/capability.h | 8 +++++++- security/selinux/include/classmap.h | 4 ++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/capability.h b/include/linux/capability.h index ecce0f43c73a..027d7e4a853b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -251,6 +251,10 @@ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); +static inline bool perfmon_capable(void) +{ + return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN); +} /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 272dc69fa080..e58c9636741b 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -367,8 +367,14 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_READ 37 +/* + * Allow system performance and observability privileged operations + * using perf_events, i915_perf and other kernel subsystems + */ + +#define CAP_PERFMON 38 -#define CAP_LAST_CAP CAP_AUDIT_READ +#define CAP_LAST_CAP CAP_PERFMON #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 986f3ac14282..d233ab3f1533 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -27,9 +27,9 @@ "audit_control", "setfcap" #define COMMON_CAP2_PERMS "mac_override", "mac_admin", "syslog", \ - "wake_alarm", "block_suspend", "audit_read" + "wake_alarm", "block_suspend", "audit_read", "perfmon" -#if CAP_LAST_CAP > CAP_AUDIT_READ +#if CAP_LAST_CAP > CAP_PERFMON #error New capability defined, please update COMMON_CAP2_PERMS. #endif -- cgit v1.2.3-59-g8ed1b