From aeafd623f866c429307e3a4a39998f5f06b4f00e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Dec 2016 14:18:51 +0100 Subject: perf tools: Move headers check into bash script To make it nicer and easily maintainable. Also moving the check into fixdep sub make, so its output is not scattered around the build output. Removing extra $$ from mman*.h checks. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481030331-31944-5-git-send-email-jolsa@kernel.org [ Use /bin/sh, and 'function check() {' -> 'check () {' to make it work with busybox, in Alpine Linux, for instance ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 94 +-------------------------------------------- tools/perf/check-headers.sh | 59 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 93 deletions(-) create mode 100755 tools/perf/check-headers.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8f1c258b151a..e9ec531131ca 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) $(goals) all: sub-make sub-make: fixdep + @./check-headers.sh $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals) else # force_fixdep @@ -404,99 +405,6 @@ export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE - @(test -f ../../include/uapi/linux/perf_event.h && ( \ - (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/hash.h && ( \ - (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \ - || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \ - (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ - (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/required-features.h && ( \ - (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ - (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memset_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/__fls.h && ( \ - (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls.h && ( \ - (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls64.h && ( \ - (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/coresight-pmu.h && ( \ - (diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \ - || echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman-common.h && ( \ - (diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(JEVENTS_IN): FORCE diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh new file mode 100755 index 000000000000..c747bfd7f14d --- /dev/null +++ b/tools/perf/check-headers.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +HEADERS=' +include/uapi/linux/perf_event.h +include/linux/hash.h +include/uapi/linux/hw_breakpoint.h +arch/x86/include/asm/disabled-features.h +arch/x86/include/asm/required-features.h +arch/x86/include/asm/cpufeatures.h +arch/arm/include/uapi/asm/perf_regs.h +arch/arm64/include/uapi/asm/perf_regs.h +arch/powerpc/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/kvm.h +arch/x86/include/uapi/asm/kvm_perf.h +arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/vmx.h +arch/powerpc/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm_perf.h +arch/s390/include/uapi/asm/sie.h +arch/arm/include/uapi/asm/kvm.h +arch/arm64/include/uapi/asm/kvm.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h +include/asm-generic/bitops/fls64.h +include/linux/coresight-pmu.h +include/uapi/asm-generic/mman-common.h +' + +check () { + file=$1 + opts= + + shift + while [ -n "$*" ]; do + opts="$opts \"$1\"" + shift + done + + cmd="diff $opts ../$file ../../$file > /dev/null" + + test -f ../../$file && + eval $cmd || echo "Warning: $file differs from kernel" >&2 +} + + +# simple diff check +for i in $HEADERS; do + check $i -B +done + +# diff with extra ignore lines +check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" +check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" -- cgit v1.2.3-59-g8ed1b From 96039c7c52e03b7d6dd773664e74b79e3ae9856a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:50 +0900 Subject: perf sched timehist: Split is_idle_sample() The is_idle_sample() function actually does more than determining whether sample come from idle task. Split the callchain part into save_task_callchain() to make it clearer. Also checking prev_pid from trace data looks preferred than just checking sample->pid since it's possible, although rare, to have invalid 0 pid/tid on scheduling an exiting task. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-2-namhyung@kernel.org [ Remove some needless () in some return statements ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1a3f1be93372..966eddce1609 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1939,39 +1939,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_run_time += r->dt_run; } -static bool is_idle_sample(struct perf_sched *sched, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine) +static bool is_idle_sample(struct perf_sample *sample, + struct perf_evsel *evsel) { - struct thread *thread; - struct callchain_cursor *cursor = &callchain_cursor; - /* pid 0 == swapper == idle task */ - if (sample->pid == 0) - return true; + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) + return perf_evsel__intval(evsel, sample, "prev_pid") == 0; - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { - if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) - return true; - } + return sample->pid == 0; +} + +static void save_task_callchain(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct callchain_cursor *cursor = &callchain_cursor; + struct thread *thread; /* want main thread for process - has maps */ thread = machine__findnew_thread(machine, sample->pid, sample->pid); if (thread == NULL) { pr_debug("Failed to get thread for pid %d.\n", sample->pid); - return false; + return; } if (!symbol_conf.use_callchain || sample->callchain == NULL) - return false; + return; if (thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, sched->max_stack + 2) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); - return false; + return; } callchain_cursor_commit(cursor); @@ -1994,8 +1995,6 @@ static bool is_idle_sample(struct perf_sched *sched, callchain_cursor_advance(cursor); } - - return false; } /* @@ -2111,7 +2110,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, { struct thread *thread; - if (is_idle_sample(sched, sample, evsel, machine)) { + if (is_idle_sample(sample, evsel)) { thread = get_idle_thread(sample->cpu); if (thread == NULL) pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); @@ -2124,6 +2123,8 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, pr_debug("Failed to get thread for tid %d. skipping sample.\n", sample->tid); } + + save_task_callchain(sched, sample, evsel, machine); } return thread; -- cgit v1.2.3-59-g8ed1b From 3bc2fa9cb829ccf6527e7117d9af769d93ee6d39 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:51 +0900 Subject: perf sched timehist: Introduce struct idle_time_data The struct idle_time_data is to keep idle stats with callchains entering to the idle task. The normal thread_runtime calculation is done transparently since it extends the struct thread_runtime. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-3-namhyung@kernel.org [ Align struct field names ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 966eddce1609..e108b0f6a246 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -230,6 +230,15 @@ struct evsel_runtime { u32 ncpu; /* highest cpu slot allocated */ }; +/* per cpu idle time data */ +struct idle_thread_runtime { + struct thread_runtime tr; + struct thread *last_thread; + struct rb_root sorted_root; + struct callchain_root callchain; + struct callchain_cursor cursor; +}; + /* track idle times per cpu */ static struct thread **idle_threads; static int idle_max_cpu; @@ -1997,13 +2006,31 @@ static void save_task_callchain(struct perf_sched *sched, } } +static int init_idle_thread(struct thread *thread) +{ + struct idle_thread_runtime *itr; + + thread__set_comm(thread, idle_comm, 0); + + itr = zalloc(sizeof(*itr)); + if (itr == NULL) + return -ENOMEM; + + init_stats(&itr->tr.run_stats); + callchain_init(&itr->callchain); + callchain_cursor_reset(&itr->cursor); + thread__set_priv(thread, itr); + + return 0; +} + /* * Track idle stats per cpu by maintaining a local thread * struct for the idle task on each cpu. */ static int init_idle_threads(int ncpu) { - int i; + int i, ret; idle_threads = zalloc(ncpu * sizeof(struct thread *)); if (!idle_threads) @@ -2017,7 +2044,9 @@ static int init_idle_threads(int ncpu) if (idle_threads[i] == NULL) return -ENOMEM; - thread__set_comm(idle_threads[i], idle_comm, 0); + ret = init_idle_thread(idle_threads[i]); + if (ret < 0) + return ret; } return 0; @@ -2064,8 +2093,8 @@ static struct thread *get_idle_thread(int cpu) idle_threads[cpu] = thread__new(0, 0); if (idle_threads[cpu]) { - idle_threads[cpu]->tid = 0; - thread__set_comm(idle_threads[cpu], idle_comm, 0); + if (init_idle_thread(idle_threads[cpu]) < 0) + return NULL; } } -- cgit v1.2.3-59-g8ed1b From 699b5b920db04a6ff5c03a519e4c182aeb350952 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:52 +0900 Subject: perf sched timehist: Save callchain when entering idle In order to investigate the idleness reason, it is necessary to keep the callchains when entering idle. This can be identified by the sched:sched_switch event having the next_pid field as 0. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-4-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-1-namhyung@kernel.org [ Merged fix from Namhyung, see second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e108b0f6a246..dc83b803ca54 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -200,6 +200,7 @@ struct perf_sched { /* options for timehist command */ bool summary; bool summary_only; + bool idle_hist; bool show_callchain; unsigned int max_stack; bool show_cpu_visual; @@ -2101,6 +2102,15 @@ static struct thread *get_idle_thread(int cpu) return idle_threads[cpu]; } +static void save_idle_callchain(struct idle_thread_runtime *itr, + struct perf_sample *sample) +{ + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return; + + callchain_cursor__copy(&itr->cursor, &callchain_cursor); +} + /* * handle runtime stats saved per thread */ @@ -2154,6 +2164,26 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } save_task_callchain(sched, sample, evsel, machine); + if (sched->idle_hist) { + struct thread *idle; + struct idle_thread_runtime *itr; + + idle = get_idle_thread(sample->cpu); + if (idle == NULL) { + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + return NULL; + } + + itr = thread__priv(idle); + if (itr == NULL) + return NULL; + + itr->last_thread = thread; + + /* copy task callchain when entering to idle */ + if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + save_idle_callchain(itr, sample); + } } return thread; -- cgit v1.2.3-59-g8ed1b From a4b2b6f56e0cfe729cf89318d44b6a875b31d95a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:53 +0900 Subject: perf sched timehist: Skip non-idle events when necessary Sometimes it only focuses on idle-related events like upcoming idle-hist feature. In this case we don't want to see other event to reduce noise. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index dc83b803ca54..c8e7848e71a7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2190,7 +2190,9 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } static bool timehist_skip_sample(struct perf_sched *sched, - struct thread *thread) + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) { bool rc = false; @@ -2199,10 +2201,19 @@ static bool timehist_skip_sample(struct perf_sched *sched, sched->skipped_samples++; } + if (sched->idle_hist) { + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + rc = true; + else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && + perf_evsel__intval(evsel, sample, "next_pid") != 0) + rc = true; + } + return rc; } static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine, struct thread *awakened) @@ -2215,8 +2226,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, return; /* show wakeup unless both awakee and awaker are filtered */ - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, awakened)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, awakened, evsel, sample)) { return; } @@ -2261,7 +2272,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, /* show wakeups if requested */ if (sched->show_wakeups && !perf_time__skip_sample(&sched->ptime, sample->time)) - timehist_print_wakeup_event(sched, sample, machine, thread); + timehist_print_wakeup_event(sched, evsel, sample, machine, thread); return 0; } @@ -2288,8 +2299,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, if (thread == NULL) return; - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, migrated)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, migrated, evsel, sample)) { return; } @@ -2374,7 +2385,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - if (timehist_skip_sample(sched, thread)) + if (timehist_skip_sample(sched, thread, evsel, sample)) goto out; tr = thread__get_runtime(thread); -- cgit v1.2.3-59-g8ed1b From 07235f84ece6b66f43334881806aad3467cf3d84 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:54 +0900 Subject: perf sched timehist: Add -I/--idle-hist option The --idle-hist option is to analyze system idle state so which process makes cpu to go idle. If this option is specified, non-idle events will be skipped and processes switching to/from idle will be shown. This option is mostly useful when used with --summary(-only) option. In the idle-time summary view, idle time is accounted to previous thread which is run before idle task. The example output looks like following: Idle-time summary comm parent sched-out idle-time min-idle avg-idle max-idle stddev migrations (count) (msec) (msec) (msec) (msec) % -------------------------------------------------------------------------------------------- rcu_preempt[7] 2 95 550.872 0.011 5.798 23.146 7.63 0 migration/1[16] 2 1 15.558 15.558 15.558 15.558 0.00 0 khugepaged[39] 2 1 3.062 3.062 3.062 3.062 0.00 0 kworker/0:1H[124] 2 2 4.728 0.611 2.364 4.116 74.12 0 systemd-journal[167] 1 1 4.510 4.510 4.510 4.510 0.00 0 kworker/u16:3[558] 2 13 74.737 0.080 5.749 12.960 21.96 0 irq/34-iwlwifi[628] 2 21 118.403 0.032 5.638 23.990 24.00 0 kworker/u17:0[673] 2 1 3.523 3.523 3.523 3.523 0.00 0 dbus-daemon[722] 1 1 6.743 6.743 6.743 6.743 0.00 0 ifplugd[741] 1 1 58.826 58.826 58.826 58.826 0.00 0 wpa_supplicant[1490] 1 1 13.302 13.302 13.302 13.302 0.00 0 wpa_actiond[1492] 1 2 4.064 0.168 2.032 3.896 91.72 0 dockerd[1500] 1 1 0.055 0.055 0.055 0.055 0.00 0 ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-6-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-2-namhyung@kernel.org [ Merged fix sent by Namhyumg, as posted in the second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 +++ tools/perf/builtin-sched.c | 46 +++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 7775b1eb2bee..76173969ab80 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-I:: +--idle-hist:: + Show idle-related events only. + --time:: Only analyze samples within given time window: ,. Times have the format seconds.microseconds. If start is not given (i.e., time diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c8e7848e71a7..0b14265432df 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2421,7 +2421,36 @@ static int timehist_sched_change_event(struct perf_tool *tool, t = ptime->end; } - timehist_update_runtime_stats(tr, t, tprev); + if (!sched->idle_hist || thread->tid == 0) { + timehist_update_runtime_stats(tr, t, tprev); + + if (sched->idle_hist) { + struct idle_thread_runtime *itr = (void *)tr; + struct thread_runtime *last_tr; + + BUG_ON(thread->tid != 0); + + if (itr->last_thread == NULL) + goto out; + + /* add current idle time as last thread's runtime */ + last_tr = thread__get_runtime(itr->last_thread); + if (last_tr == NULL) + goto out; + + timehist_update_runtime_stats(last_tr, t, tprev); + /* + * remove delta time of last thread as it's not updated + * and otherwise it will show an invalid value next + * time. we only care total run time and run stat. + */ + last_tr->dt_run = 0; + last_tr->dt_wait = 0; + last_tr->dt_delay = 0; + + itr->last_thread = NULL; + } + } if (!sched->summary_only) timehist_print_sample(sched, sample, &al, thread, t); @@ -2543,9 +2572,15 @@ static void timehist_print_summary(struct perf_sched *sched, if (comm_width < 30) comm_width = 30; - printf("\nRuntime summary\n"); - printf("%*s parent sched-in ", comm_width, "comm"); - printf(" run-time min-run avg-run max-run stddev migrations\n"); + if (sched->idle_hist) { + printf("\nIdle-time summary\n"); + printf("%*s parent sched-out ", comm_width, "comm"); + printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); + } else { + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev migrations\n"); + } printf("%*s (count) ", comm_width, ""); printf(" (msec) (msec) (msec) (msec) %%\n"); printf("%.117s\n", graph_dotted_line); @@ -2561,7 +2596,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\n"); /* CPU idle stats not tracked when samples were skipped */ - if (sched->skipped_samples) + if (sched->skipped_samples && !sched->idle_hist) return; printf("\nIdle stats:\n"); @@ -3107,6 +3142,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_PARENT(sched_options) -- cgit v1.2.3-59-g8ed1b From ba957ebb54893acaf3dc846031073a63f021cee1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:55 +0900 Subject: perf sched timehist: Show callchains for idle stat When --idle-hist option is used with --summary, it now shows idle stats with callchains like below: Idle stats by callchain: CPU 0: 902.195 msec Idle time (msec) Count Callchains ---------------- ------- -------------------------------------------------- 370.589 69 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 178.799 17 worker_thread <- kthread <- ret_from_fork 128.352 17 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 125.111 19 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 71.599 50 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 23.146 1 rcu_gp_kthread <- kthread <- ret_from_fork 4.510 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 0.085 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- do_restart_poll ... Committer notes: Extra testing: # uname -a Linux jouet 4.8.8-300.fc25.x86_64 #1 SMP Tue Nov 15 18:10:06 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux 1) Run 'perf sched record -g' 2) Run 'perf sched timehist --idle --summary' Idle stats by callchain: CPU 0: 13456.840 msec Idle time (msec) Count Callchains ---------------- ----- -------------------------------------------------- 5386.637 3283 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 2750.238 2299 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- do_syscall_64 1275.672 1287 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- entry_SYSCALL_64_fastpath 936.322 452 worker_thread <- kthread <- ret_from_fork 741.311 385 rcu_nocb_kthread <- kthread <- ret_from_fork 729.385 248 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_ppoll 365.386 229 irq_thread <- kthread <- ret_from_fork 338.934 265 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 219.488 201 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 186.839 410 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 142.541 59 kvm_vcpu_block <- kvm_arch_vcpu_ioctl_run <- kvm_vcpu_ioctl <- do_vfs_ioctl <- sys_ioctl 83.887 92 smpboot_thread_fn <- kthread <- ret_from_fork 62.722 96 do_exit <- do_group_exit <- 0x2a5594 <- entry_SYSCALL_64_fastpath 47.894 83 pipe_wait <- pipe_read <- __vfs_read <- vfs_read <- sys_read 46.554 61 rcu_gp_kthread <- kthread <- ret_from_fork 34.337 21 schedule_timeout <- intel_fbc_work_fn <- process_one_work <- worker_thread <- kthread 29.521 14 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 20.274 10 schedule_timeout <- io_schedule_timeout <- bit_wait_io <- __wait_on_bit <- out_of_line_wait_on_bit 15.085 55 schedule_timeout <- unix_stream_read_generic <- unix_stream_recvmsg <- sock_recvmsg <- SYSC_recvfrom Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0b14265432df..c1c07bfe132c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2448,6 +2448,9 @@ static int timehist_sched_change_event(struct perf_tool *tool, last_tr->dt_wait = 0; last_tr->dt_delay = 0; + if (itr->cursor.nr) + callchain_append(&itr->callchain, &itr->cursor, t - tprev); + itr->last_thread = NULL; } } @@ -2557,6 +2560,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv) return __show_thread_runtime(t, priv); } +static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = " <- "; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (node == NULL) + return 0; + + ret = callchain__fprintf_folded(fp, node->parent); + first = (ret == 0); + + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym && chain->ms.sym->ignore) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, bf, sizeof(bf), + false)); + first = false; + } + + return ret; +} + +static size_t timehist_print_idlehist_callchain(struct rb_root *root) +{ + size_t ret = 0; + FILE *fp = stdout; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(root); + + printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); + printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + while (rb_node) { + chain = rb_entry(rb_node, struct callchain_node, rb_node); + rb_node = rb_next(rb_node); + + ret += fprintf(fp, " "); + print_sched_time(chain->hit, 12); + ret += 16; /* print_sched_time returns 2nd arg + 4 */ + ret += fprintf(fp, " %8d ", chain->count); + ret += callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + } + + return ret; +} + static void timehist_print_summary(struct perf_sched *sched, struct perf_session *session) { @@ -2615,6 +2672,35 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } + if (sched->idle_hist && symbol_conf.use_callchain) { + callchain_param.mode = CHAIN_FOLDED; + callchain_param.value = CCVAL_PERIOD; + + callchain_register_param(&callchain_param); + + printf("\nIdle stats by callchain:\n"); + for (i = 0; i < idle_max_cpu; ++i) { + struct idle_thread_runtime *itr; + + t = idle_threads[i]; + if (!t) + continue; + + itr = thread__priv(t); + if (itr == NULL) + continue; + + callchain_param.sort(&itr->sorted_root, &itr->callchain, + 0, &callchain_param); + + printf(" CPU %2d:", i); + print_sched_time(itr->tr.total_run_time, 6); + printf(" msec\n"); + timehist_print_idlehist_callchain(&itr->sorted_root); + printf("\n"); + } + } + printf("\n" " Total number of unique tasks: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n" -- cgit v1.2.3-59-g8ed1b From 7e6a79981b7a797b37b1dbeca3fd6ae1cb6d881f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 Dec 2016 10:52:10 -0300 Subject: perf tools: Remove some needless __maybe_unused I.e. those parameters/functions _are_ used, so ditch that misleading attribute. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-13cqtjh0yojg5gzvpq1zzpl0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 13 ++++++------- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 6 +++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4b419631753d..f8ca7a4ebabc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he, static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, struct machine *machine) { struct c2c_hists *c2c_hists = &c2c.hists; @@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se) static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct hists *hists __maybe_unused) + struct hists *hists) { struct c2c_fmt *c2c_fmt; struct c2c_dimension *dim; @@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); static int -cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { int width = c2c_width(fmt, hpp, he->hists); @@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists, return hpp_list__parse(&hists->list, NULL, sort); } -__maybe_unused static int c2c_hists__reinit(struct c2c_hists *c2c_hists, const char *output, const char *sort) @@ -2658,7 +2657,7 @@ out: return err; } -static int parse_record_events(const struct option *opt __maybe_unused, +static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { bool *event_set = (bool *) opt->value; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d2afbe4a240d..06cc759a4597 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, } static int -parse_branch_mode(const struct option *opt __maybe_unused, +parse_branch_mode(const struct option *opt, const char *str __maybe_unused, int unset) { int *branch_mode = opt->value; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 688dea7cb08f..a02f2e965628 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, } static -int process_stat_config_event(struct perf_tool *tool __maybe_unused, +int process_stat_config_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st) } static -int process_thread_map_event(struct perf_tool *tool __maybe_unused, +int process_thread_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused, } static -int process_cpu_map_event(struct perf_tool *tool __maybe_unused, +int process_cpu_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { -- cgit v1.2.3-59-g8ed1b From 631ac41b46d293fb3ee43a809776c1663de8d9c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:39 +0100 Subject: perf mem: Fix --all-user/--all-kernel options Removing extra '--' prefix. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ad16511b0e40 ("perf mem: Add -U/-K (--all-user/--all-kernel) options") Link: http://lkml.kernel.org/r/1481538943-21874-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e..cd7bc4d104e2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; -- cgit v1.2.3-59-g8ed1b From 83c2e4f3968d6871eed295f2f5675d3d70b01afa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:40 +0100 Subject: perf evsel: Use variable instead of repeating lengthy FD macro It's more readable and will ease up following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b2365a63db45..fd61ebd77c26 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1474,7 +1474,7 @@ retry_sample_id: for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { - int group_fd; + int fd, group_fd; if (!evsel->cgrp && !evsel->system_wide) pid = thread_map__pid(threads, thread); @@ -1484,21 +1484,22 @@ retry_open: pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); - FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - pid, - cpus->map[cpu], - group_fd, flags); - if (FD(evsel, cpu, thread) < 0) { + fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], + group_fd, flags); + + FD(evsel, cpu, thread) = fd; + + if (fd < 0) { err = -errno; pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } - pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + pr_debug2(" = %d\n", fd); if (evsel->bpf_fd >= 0) { - int evt_fd = FD(evsel, cpu, thread); + int evt_fd = fd; int bpf_fd = evsel->bpf_fd; err = ioctl(evt_fd, -- cgit v1.2.3-59-g8ed1b From 38af91f01de0e160c17ae380acb5bab5d51066f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:41 +0100 Subject: perf thread_map: Add thread_map__remove function Add thread_map__remove function to remove thread from thread map. Add automated test also. Committer notes: Testing it: # perf test "Remove thread map" 39: Remove thread map : Ok # perf test -v "Remove thread map" 39: Remove thread map : --- start --- test child forked, pid 4483 2 threads: 4482, 4483 1 thread: 4483 0 thread: test child finished with 0 ---- end ---- Remove thread map: Ok # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-4-git-send-email-jolsa@kernel.org [ Added stdlib.h, to get the free() declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/thread-map.c | 44 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread_map.c | 22 +++++++++++++++++++++ tools/perf/util/thread_map.h | 1 + 5 files changed, 72 insertions(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 23605202d4a1..a77dcc0d24e3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -185,6 +185,10 @@ static struct test generic_tests[] = { .desc = "Synthesize thread map", .func = test__thread_map_synthesize, }, + { + .desc = "Remove thread map", + .func = test__thread_map_remove, + }, { .desc = "Synthesize cpu map", .func = test__cpu_map_synthesize, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 0d7b251305af..a512f0c8ff5b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest); int test__bpf_subtest_get_nr(void); int test_session_topology(int subtest); int test__thread_map_synthesize(int subtest); +int test__thread_map_remove(int subtest); int test__cpu_map_synthesize(int subtest); int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index cee2a2cdc933..a4a4b4625ac3 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused) return 0; } + +int test__thread_map_remove(int subtest __maybe_unused) +{ + struct thread_map *threads; + char *str; + int i; + + TEST_ASSERT_VAL("failed to allocate map string", + asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); + + threads = thread_map__new_str(str, NULL, 0); + + TEST_ASSERT_VAL("failed to allocate thread_map", + threads); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to not remove thread", + thread_map__remove(threads, 0)); + + for (i = 0; i < threads->nr; i++) + free(threads->map[i].comm); + + free(threads); + return 0; +} diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 40585f5b7027..f9eab200fd75 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid) return false; } + +int thread_map__remove(struct thread_map *threads, int idx) +{ + int i; + + if (threads->nr < 1) + return -EINVAL; + + if (idx >= threads->nr) + return -EINVAL; + + /* + * Free the 'idx' item and shift the rest up. + */ + free(threads->map[idx].comm); + + for (i = idx; i < threads->nr - 1; i++) + threads->map[i] = threads->map[i + 1]; + + threads->nr--; + return 0; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index bd3b971588da..ea0ef08c6303 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) void thread_map__read_comms(struct thread_map *threads); bool thread_map__has(struct thread_map *threads, pid_t pid); +int thread_map__remove(struct thread_map *threads, int idx); #endif /* __PERF_THREAD_MAP_H */ -- cgit v1.2.3-59-g8ed1b From a359c17a7e1a9c99384499cf7b43d80867080789 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Dec 2016 08:46:22 +0100 Subject: perf evsel: Allow to ignore missing pid Adding perf_evsel::ignore_missing_cpu_thread bool. When set true, it allows perf to ignore error of missing pid of perf event syscall. We remove missing thread id from the thread_map, so the rest of the processing like ioctl and mmap won't get disturbed with -1 fd. The reason for supporting this is to ease up monitoring group of pids, that 'disappear' before perf opens their event. This currently leads perf to report error and exit and makes perf record's -u option unusable under certain setup. With this change we will allow this race and ignore such failure with following warning: WARNING: Ignored open failure for pid 8605 Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161213074622.GA3084@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 3 files changed, 46 insertions(+) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 9a0236a4cf95..1c27d947c2fe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -55,6 +55,7 @@ struct record_opts { bool all_user; bool tail_synthesize; bool overwrite; + bool ignore_missing_thread; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fd61ebd77c26..04e536ae4d88 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * it overloads any global configuration. */ apply_config_terms(evsel, opts); + + evsel->ignore_missing_thread = opts->ignore_missing_thread; } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static bool ignore_missing_thread(struct perf_evsel *evsel, + struct thread_map *threads, + int thread, int err) +{ + if (!evsel->ignore_missing_thread) + return false; + + /* The system wide setup does not work with threads. */ + if (evsel->system_wide) + return false; + + /* The -ESRCH is perf event syscall errno for pid's not found. */ + if (err != -ESRCH) + return false; + + /* If there's only one thread, let it fail. */ + if (threads->nr == 1) + return false; + + if (thread_map__remove(threads, thread)) + return false; + + pr_warning("WARNING: Ignored open failure for pid %d\n", + thread_map__pid(threads, thread)); + return true; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1491,6 +1520,21 @@ retry_open: if (fd < 0) { err = -errno; + + if (ignore_missing_thread(evsel, threads, thread, err)) { + /* + * We just removed 1 thread, so take a step + * back on thread index and lower the upper + * nthreads limit. + */ + nthreads--; + thread--; + + /* ... and pretend like nothing have happened. */ + err = 0; + continue; + } + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6abb89cd27f9..06ef6f29efa1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; + bool ignore_missing_thread; /* parse modifier helper */ int exclude_GH; int nr_members; -- cgit v1.2.3-59-g8ed1b From 23dc4f1586159470aac707caae526824dd077e25 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:43 +0100 Subject: perf record: Force ignore_missing_thread for uid option Enable perf_evsel::ignore_missing_thread for -u option to ignore complete failure if any of the user's processes die between its enumeration and time we open the event. Committer notes: While doing a 'make -j4 allmodconfig' we sometimes get into the race: Before: # perf record -u acme Error: The sys_perf_event_open() syscall returned with 3 (No such process) for event (cycles:ppp). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? # After: [root@jouet ~]# perf record -u acme WARNING: Ignored open failure for pid 9888 WARNING: Ignored open failure for pid 18059 [root@jouet ~]# Which is an improvement, with the races not preventing the remaining threads for the specified user from being monitored, but the message probably needs further clarification. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fa26865364b6..74d6a035133a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + /* Enable ignoring missing threads when -u option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); -- cgit v1.2.3-59-g8ed1b From 3ee2eb6da20db1edad31070da38996e8e0f8adfa Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:46 +0530 Subject: perf annotate: Support jump instruction with target as second operand Architectures like PowerPC have jump instructions that includes a target address as a second operand. For example, 'bne cr7,0xc0000000000f6154'. Add support for such instruction in perf annotate. objdump o/p: c0000000000f6140: ld r9,1032(r31) c0000000000f6144: cmpdi cr7,r9,0 c0000000000f6148: bne cr7,0xc0000000000f6154 c0000000000f614c: ld r9,2312(r30) c0000000000f6150: std r9,1032(r31) c0000000000f6154: ld r9,88(r31) Corresponding perf annotate o/p: Before patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 3ffffffffff09f2c ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) After patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 74 ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ea7e0de4b9c1..590244e5781e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -223,8 +223,12 @@ bool ins__is_call(const struct ins *ins) static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); + const char *c = strchr(ops->raw, ','); - ops->target.addr = strtoull(ops->raw, NULL, 16); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + else + ops->target.addr = strtoull(ops->raw, NULL, 16); if (s++ != NULL) ops->target.offset = strtoull(s, NULL, 16); -- cgit v1.2.3-59-g8ed1b From e216874cc1946d28084fa90e495e02725a29e25f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:47 +0530 Subject: perf annotate: Fix jump target outside of function address range If jump target is outside of function range, perf is not handling it correctly. Especially when target address is lesser than function start address, target offset will be negative. But, target address declared to be unsigned, converts negative number into 2's complement. See below example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is lesser than function start address(34cf0). 34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0 Objdump output: 0000000000034cf0 <__sigaction>: __GI___sigaction(): 34cf0: lea -0x20(%rdi),%eax 34cf3: cmp -bashx1,%eax 34cf6: jbe 34d00 <__sigaction+0x10> 34cf8: jmpq 34ac0 <__GI___libc_sigaction> 34cfd: nopl (%rax) 34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8> 34d07: movl -bashx16,%fs:(%rax) 34d0e: mov -bashxffffffff,%eax 34d13: retq perf annotate before applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 v jmpq fffffffffffffdd0 nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq perf annotate after applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 ^ jmpq 34ac0 <__GI___libc_sigaction> nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 5 +++-- tools/perf/util/annotate.c | 14 +++++++++----- tools/perf/util/annotate.h | 5 +++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ec7a30fad149..ba36aac340bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > (u64)dl->offset; + bool fwd = dl->ops.target.offset > dl->offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy { if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || !disasm_line__has_offset(dl) - || dl->ops.target.offset >= symbol__size(sym)) + || dl->ops.target.offset < 0 + || dl->ops.target.offset >= (s64)symbol__size(sym)) return false; return true; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 590244e5781e..c81a3950a7fe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -230,10 +230,12 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op else ops->target.addr = strtoull(ops->raw, NULL, 16); - if (s++ != NULL) + if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); - else - ops->target.offset = UINT64_MAX; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } return 0; } @@ -241,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (!ops->target.addr) + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); @@ -1209,9 +1211,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (dl == NULL) return -1; - if (dl->ops.target.offset == UINT64_MAX) + if (!disasm_line__has_offset(dl)) { dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail = true; + } /* kcore has no symbols, so add the call target name */ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 87e4cadc5d27..09776b5af991 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,7 +24,8 @@ struct ins_operands { char *raw; char *name; u64 addr; - u64 offset; + s64 offset; + bool offset_avail; } target; union { struct { @@ -68,7 +69,7 @@ struct disasm_line { static inline bool disasm_line__has_offset(const struct disasm_line *dl) { - return dl->ops.target.offset != UINT64_MAX; + return dl->ops.target.offset_avail; } void disasm_line__free(struct disasm_line *dl); -- cgit v1.2.3-59-g8ed1b From 0cb34dc2a37290f4069c5b01735c9725dc0a1b5c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:14 -0800 Subject: tools lib bpf: Sync {tools,}/include/uapi/linux/bpf.h The tools version of this header is out of date; update it to the latest version from the kernel headers. Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-2-joe@ovn.org [ Sync it harder, after merging with what was in net-next via perf/urgent via torvalds/master to get BPG_PROG_(AT|DE)TACH, etc ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 593 +++++++++++++++++++++++++---------------- 1 file changed, 364 insertions(+), 229 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9e5fc168c8a3..0eb0e87dbe9f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,6 +73,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -85,6 +87,8 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, }; enum bpf_prog_type { @@ -95,8 +99,23 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -105,6 +124,13 @@ enum bpf_prog_type { #define BPF_EXIST 2 /* update existing element */ #define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -140,243 +166,327 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ @@ -450,6 +560,31 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result -- cgit v1.2.3-59-g8ed1b From 83d994d02bf4594d6bfa9b2b67befb6cff7f9380 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:15 -0800 Subject: tools lib bpf: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following issue when building without access to 'u32' type: ./tools/lib/bpf/bpf.h:27:23: error: unknown type name ‘u32’ Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-3-joe@ovn.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/bpf.c | 4 ++-- tools/lib/bpf/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8143536b462a..89e8e8e5b60e 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -70,7 +70,7 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, size_t log_buf_sz) + __u32 kern_version, char *log_buf, size_t log_buf_sz) { int fd; union bpf_attr attr; @@ -98,7 +98,7 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, } int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags) + __u64 flags) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 253c3dbb06b4..61130170a6ad 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -30,11 +30,11 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, #define BPF_LOG_BUF_SIZE 65536 int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, + __u32 kern_version, char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags); + __u64 flags); int bpf_map_lookup_elem(int fd, void *key, void *value); int bpf_map_delete_elem(int fd, void *key); -- cgit v1.2.3-59-g8ed1b From a5580c7f7a6d078696458d283df5d6547ad1c740 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:16 -0800 Subject: tools lib bpf: Add flags to bpf_create_map() Commit 6c905981743 ("bpf: pre-allocate hash map elements") introduces map_flags to bpf_attr for BPF_MAP_CREATE command. Expose this new parameter in libbpf. By exposing it, users can access flags such as whether or not to preallocate the map. Signed-off-by: Joe Stringer Acked-by: Wang Nan Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: http://lkml.kernel.org/r/20161209024620.31660-4-joe@ovn.org [ Added clarifying comment made by Wang Nan ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/bpf.c | 3 ++- tools/lib/bpf/bpf.h | 2 +- tools/lib/bpf/libbpf.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 89e8e8e5b60e..d0afb26c2e0f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -54,7 +54,7 @@ static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, } int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries) + int value_size, int max_entries, __u32 map_flags) { union bpf_attr attr; @@ -64,6 +64,7 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, attr.key_size = key_size; attr.value_size = value_size; attr.max_entries = max_entries; + attr.map_flags = map_flags; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 61130170a6ad..7fcdce16fd62 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,7 +24,7 @@ #include int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries); + int max_entries, __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e974593f3e8..84e6b35da4bd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -854,7 +854,8 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map(def->type, def->key_size, def->value_size, - def->max_entries); + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; -- cgit v1.2.3-59-g8ed1b From d40fc181ebec6b1d560e2167208276baa4f3bbf0 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 14 Dec 2016 14:43:38 -0800 Subject: samples/bpf: Make samples more libbpf-centric Switch all of the sample code to use the function names from tools/lib/bpf so that they're consistent with that, and to declare their own log buffers. This allow the next commit to be purely devoted to getting rid of the duplicate library in samples/bpf. Committer notes: Testing it: On a fedora rawhide container, with clang/llvm 3.9, sharing the host linux kernel git tree: # make O=/tmp/build/linux/ headers_install # make O=/tmp/build/linux -C samples/bpf/ Since I forgot to make it privileged, just tested it outside the container, using what it generated: # uname -a Linux jouet 4.9.0-rc8+ #1 SMP Mon Dec 12 11:20:49 BRT 2016 x86_64 x86_64 x86_64 GNU/Linux # cd /var/lib/docker/devicemapper/mnt/c43e09a53ff56c86a07baf79847f00e2cc2a17a1e2220e1adbf8cbc62734feda/rootfs/tmp/build/linux/samples/bpf/ # ls -la offwaketime -rwxr-xr-x. 1 root root 24200 Dec 15 12:19 offwaketime # file offwaketime offwaketime: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=c940d3f127d5e66cdd680e42d885cb0b64f8a0e4, not stripped # readelf -SW offwaketime_kern.o | grep PROGBITS [ 2] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4 [ 3] kprobe/try_to_wake_up PROGBITS 0000000000000000 000040 0000d8 00 AX 0 0 8 [ 5] tracepoint/sched/sched_switch PROGBITS 0000000000000000 000118 000318 00 AX 0 0 8 [ 7] maps PROGBITS 0000000000000000 000430 000050 00 WA 0 0 4 [ 8] license PROGBITS 0000000000000000 000480 000004 00 WA 0 0 1 [ 9] version PROGBITS 0000000000000000 000484 000004 00 WA 0 0 4 # ./offwaketime | head -5 swapper/1;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule;__schedule;-;---;; 106 CPU 0/KVM;entry_SYSCALL_64_fastpath;sys_ioctl;do_vfs_ioctl;kvm_vcpu_ioctl;kvm_arch_vcpu_ioctl_run;kvm_vcpu_block;schedule;__schedule;-;try_to_wake_up;swake_up_locked;swake_up;apic_timer_expired;apic_timer_fn;__hrtimer_run_queues;hrtimer_interrupt;local_apic_timer_interrupt;smp_apic_timer_interrupt;__irqentry_text_start;cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary;;swapper/3 2 Compositor;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;futex_requeue;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;SoftwareVsyncTh 5 firefox;entry_SYSCALL_64_fastpath;sys_poll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule;__schedule;-;try_to_wake_up;pollwake;__wake_up_common;__wake_up_sync_key;pipe_write;__vfs_write;vfs_write;sys_write;entry_SYSCALL_64_fastpath;;Timer 13 JS Helper;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;firefox 2 # Signed-off-by: Joe Stringer Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20161214224342.12858-2-joe@ovn.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/bpf_load.c | 17 +++++++++--- samples/bpf/bpf_load.h | 3 +++ samples/bpf/fds_example.c | 9 ++++--- samples/bpf/lathist_user.c | 2 +- samples/bpf/libbpf.c | 23 ++++++++-------- samples/bpf/libbpf.h | 18 ++++++------- samples/bpf/lwt_len_hist_user.c | 6 +++-- samples/bpf/offwaketime_user.c | 8 +++--- samples/bpf/sampleip_user.c | 4 +-- samples/bpf/sock_example.c | 12 +++++---- samples/bpf/sockex1_user.c | 6 ++--- samples/bpf/sockex2_user.c | 4 +-- samples/bpf/sockex3_user.c | 4 +-- samples/bpf/spintest_user.c | 8 +++--- samples/bpf/tc_l2_redirect_user.c | 4 +-- samples/bpf/test_cgrp2_array_pin.c | 4 +-- samples/bpf/test_cgrp2_attach.c | 11 +++++--- samples/bpf/test_cgrp2_attach2.c | 7 +++-- samples/bpf/test_cgrp2_sock.c | 6 +++-- samples/bpf/test_current_task_under_cgroup_user.c | 8 +++--- samples/bpf/test_lru_dist.c | 32 +++++++++++------------ samples/bpf/test_probe_write_user_user.c | 2 +- samples/bpf/trace_event_user.c | 14 +++++----- samples/bpf/trace_output_user.c | 2 +- samples/bpf/tracex2_user.c | 10 +++---- samples/bpf/tracex3_user.c | 4 +-- samples/bpf/tracex4_user.c | 4 +-- samples/bpf/tracex6_user.c | 2 +- samples/bpf/xdp1_user.c | 2 +- samples/bpf/xdp_tx_iptunnel_user.c | 6 ++--- 30 files changed, 133 insertions(+), 109 deletions(-) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index e30b6de94f2e..f5b186c46b7c 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -22,7 +22,6 @@ #include #include #include "libbpf.h" -#include "bpf_helpers.h" #include "bpf_load.h" #define DEBUGFS "/sys/kernel/debug/tracing/" @@ -30,17 +29,26 @@ static char license[128]; static int kern_version; static bool processed_sec[128]; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; int event_fd[MAX_PROGS]; int prog_cnt; int prog_array_fd = -1; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + static int populate_prog_array(const char *event, int prog_fd) { int ind = atoi(event), err; - err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); if (err < 0) { printf("failed to store prog_fd in prog_array\n"); return -1; @@ -87,9 +95,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } - fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + fd = bpf_load_program(prog_type, prog, size, license, kern_version, + bpf_log_buf, BPF_LOG_BUF_SIZE); if (fd < 0) { - printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); return -1; } diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index fb46a421ab41..c827827299b3 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -1,12 +1,15 @@ #ifndef __BPF_LOAD_H #define __BPF_LOAD_H +#include "libbpf.h" + #define MAX_MAPS 32 #define MAX_PROGS 32 extern int map_fd[MAX_MAPS]; extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; +extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; extern int prog_cnt; /* parses elf file compiled by llvm .c->.o diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 625e797be6ef..8a4fc4ef3993 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -58,8 +58,9 @@ static int bpf_prog_create(const char *object) assert(!load_bpf_file((char *)object)); return prog_fd[0]; } else { - return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, - insns, sizeof(insns), "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, + insns, sizeof(insns), "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } } @@ -83,12 +84,12 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key, } if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) { - ret = bpf_update_elem(fd, &key, &value, 0); + ret = bpf_map_update_elem(fd, &key, &value, 0); printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value, ret, strerror(errno)); assert(ret == 0); } else if (flags & BPF_F_KEY) { - ret = bpf_lookup_elem(fd, &key, &value); + ret = bpf_map_lookup_elem(fd, &key, &value); printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value, ret, strerror(errno)); assert(ret == 0); diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c index 65da8c1576de..6477bad5b4e2 100644 --- a/samples/bpf/lathist_user.c +++ b/samples/bpf/lathist_user.c @@ -73,7 +73,7 @@ static void get_data(int fd) for (c = 0; c < MAX_CPU; c++) { for (i = 0; i < MAX_ENTRIES; i++) { key = c * MAX_ENTRIES + i; - bpf_lookup_elem(fd, &key, &value); + bpf_map_lookup_elem(fd, &key, &value); cpu_hist[c].data[i] = value; if (value > cpu_hist[c].max) diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 9ce707bf02a7..6f076abdca35 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -32,7 +32,7 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) +int bpf_map_update_elem(int fd, void *key, void *value, unsigned long long flags) { union bpf_attr attr = { .map_fd = fd, @@ -44,7 +44,7 @@ int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } -int bpf_lookup_elem(int fd, void *key, void *value) +int bpf_map_lookup_elem(int fd, void *key, void *value) { union bpf_attr attr = { .map_fd = fd, @@ -55,7 +55,7 @@ int bpf_lookup_elem(int fd, void *key, void *value) return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } -int bpf_delete_elem(int fd, void *key) +int bpf_map_delete_elem(int fd, void *key) { union bpf_attr attr = { .map_fd = fd, @@ -65,7 +65,7 @@ int bpf_delete_elem(int fd, void *key) return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); } -int bpf_get_next_key(int fd, void *key, void *next_key) +int bpf_map_get_next_key(int fd, void *key, void *next_key) { union bpf_attr attr = { .map_fd = fd, @@ -78,19 +78,18 @@ int bpf_get_next_key(int fd, void *key, void *next_key) #define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) -char bpf_log_buf[LOG_BUF_SIZE]; - -int bpf_prog_load(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int prog_len, - const char *license, int kern_version) +int bpf_load_program(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, int prog_len, + const char *license, int kern_version, + char *log_buf, size_t log_buf_sz) { union bpf_attr attr = { .prog_type = prog_type, .insns = ptr_to_u64((void *) insns), .insn_cnt = prog_len / sizeof(struct bpf_insn), .license = ptr_to_u64((void *) license), - .log_buf = ptr_to_u64(bpf_log_buf), - .log_size = LOG_BUF_SIZE, + .log_buf = ptr_to_u64(log_buf), + .log_size = log_buf_sz, .log_level = 1, }; @@ -99,7 +98,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, */ attr.kern_version = kern_version; - bpf_log_buf[0] = 0; + log_buf[0] = 0; return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 94a901d86fc2..20e3457857ca 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -6,14 +6,15 @@ struct bpf_insn; int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, int map_flags); -int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); -int bpf_lookup_elem(int fd, void *key, void *value); -int bpf_delete_elem(int fd, void *key); -int bpf_get_next_key(int fd, void *key, void *next_key); +int bpf_map_update_elem(int fd, void *key, void *value, unsigned long long flags); +int bpf_map_lookup_elem(int fd, void *key, void *value); +int bpf_map_delete_elem(int fd, void *key); +int bpf_map_get_next_key(int fd, void *key, void *next_key); -int bpf_prog_load(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int insn_len, - const char *license, int kern_version); +int bpf_load_program(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, int insn_len, + const char *license, int kern_version, + char *log_buf, size_t log_buf_sz); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); @@ -21,8 +22,7 @@ int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -#define LOG_BUF_SIZE (256 * 1024) -extern char bpf_log_buf[LOG_BUF_SIZE]; +#define BPF_LOG_BUF_SIZE (256 * 1024) /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c index 05d783fc5daf..ec8f3bbcbef3 100644 --- a/samples/bpf/lwt_len_hist_user.c +++ b/samples/bpf/lwt_len_hist_user.c @@ -14,6 +14,8 @@ #define MAX_INDEX 64 #define MAX_STARS 38 +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static void stars(char *str, long val, long max, int width) { int i; @@ -41,13 +43,13 @@ int main(int argc, char **argv) return -1; } - while (bpf_get_next_key(map_fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) { if (next_key >= MAX_INDEX) { fprintf(stderr, "Key %lu out of bounds\n", next_key); continue; } - bpf_lookup_elem(map_fd, &next_key, values); + bpf_map_lookup_elem(map_fd, &next_key, values); sum = 0; for (i = 0; i < nr_cpus; i++) diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 6f002a9c24fa..9cce2a66bd66 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -49,14 +49,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%s;", key->target); - if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) { printf("---;"); } else { for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) @@ -77,8 +77,8 @@ static void print_stacks(int fd) struct key_t key = {}, next_key; __u64 value; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); print_stack(&next_key, value); key = next_key; } diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 260a6bdd6413..5ac5adf75931 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -95,8 +95,8 @@ static void print_ip_map(int fd) /* fetch IPs and counts */ key = 0, i = 0; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); counts[i].ip = next_key; counts[i++].count = value; key = next_key; diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 28b60baa9fa8..d6b91e9a38ad 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -28,6 +28,8 @@ #include #include "libbpf.h" +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int test_sock(void) { int sock = -1, map_fd, prog_fd, i, key; @@ -55,8 +57,8 @@ static int test_sock(void) BPF_EXIT_INSN(), }; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), - "GPL", 0); + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { printf("failed to load prog '%s'\n", strerror(errno)); goto cleanup; @@ -72,13 +74,13 @@ static int test_sock(void) for (i = 0; i < 10; i++) { key = IPPROTO_TCP; - assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0); key = IPPROTO_UDP; - assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0); key = IPPROTO_ICMP; - assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0); printf("TCP %lld UDP %lld ICMP %lld packets\n", tcp_cnt, udp_cnt, icmp_cnt); diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 678ce4693551..9454448bf198 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -32,13 +32,13 @@ int main(int ac, char **argv) int key; key = IPPROTO_TCP; - assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); key = IPPROTO_UDP; - assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); key = IPPROTO_ICMP; - assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); printf("TCP %lld UDP %lld ICMP %lld bytes\n", tcp_cnt, udp_cnt, icmp_cnt); diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 8a4085c2d117..6a40600d5a83 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -39,8 +39,8 @@ int main(int ac, char **argv) int key = 0, next_key; struct pair value; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); printf("ip %s bytes %lld packets %lld\n", inet_ntoa((struct in_addr){htonl(next_key)}), value.bytes, value.packets); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 3fcfd8c4b2a3..9099c4255f23 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -54,8 +54,8 @@ int main(int argc, char **argv) sleep(1); printf("IP src.port -> dst.port bytes packets\n"); - while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[2], &next_key, &value); + while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[2], &next_key, &value); printf("%s.%05d -> %s.%05d %12lld %12lld\n", inet_ntoa((struct in_addr){htonl(next_key.src)}), next_key.port16[0], diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 311ede532230..80676c25fa50 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -31,8 +31,8 @@ int main(int ac, char **argv) for (i = 0; i < 5; i++) { key = 0; printf("kprobing funcs:"); - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); assert(next_key == value); sym = ksym_search(value); printf(" %s", sym->name); @@ -41,8 +41,8 @@ int main(int ac, char **argv) if (key) printf("\n"); key = 0; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) - bpf_delete_elem(map_fd[0], &next_key); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) + bpf_map_delete_elem(map_fd[0], &next_key); sleep(1); } diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c index 4013c5337b91..28995a776560 100644 --- a/samples/bpf/tc_l2_redirect_user.c +++ b/samples/bpf/tc_l2_redirect_user.c @@ -60,9 +60,9 @@ int main(int argc, char **argv) } /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ - ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + ret = bpf_map_update_elem(array_fd, &array_key, &ifindex, 0); if (ret) { - perror("bpf_update_elem"); + perror("bpf_map_update_elem"); goto out; } diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c index 70e86f7be69d..8a1b8b5d8def 100644 --- a/samples/bpf/test_cgrp2_array_pin.c +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -85,9 +85,9 @@ int main(int argc, char **argv) } } - ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0); if (ret) { - perror("bpf_update_elem"); + perror("bpf_map_update_elem"); goto out; } diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index a19484c45b79..8283ef86d392 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -36,6 +36,8 @@ enum { MAP_KEY_BYTES, }; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int map_fd, int verdict) { struct bpf_insn prog[] = { @@ -67,8 +69,9 @@ static int prog_load(int map_fd, int verdict) BPF_EXIT_INSN(), }; - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, sizeof(prog), "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } static int usage(const char *argv0) @@ -108,10 +111,10 @@ static int attach_filter(int cg_fd, int type, int verdict) } while (1) { key = MAP_KEY_PACKETS; - assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0); key = MAP_KEY_BYTES; - assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0); printf("cgroup received %lld packets, %lld bytes\n", pkt_cnt, byte_cnt); diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index ddfac42ed4df..fc6092fdc3b0 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -32,6 +32,8 @@ #define BAR "/foo/bar/" #define PING_CMD "ping -c1 -w1 127.0.0.1" +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int verdict) { int ret; @@ -40,8 +42,9 @@ static int prog_load(int verdict) BPF_EXIT_INSN(), }; - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0); + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, sizeof(prog), "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); if (ret < 0) { log_err("Loading program"); diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index d467b3c1c55c..43b4bde5d05c 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -23,6 +23,8 @@ #include "libbpf.h" +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int idx) { struct bpf_insn prog[] = { @@ -34,8 +36,8 @@ static int prog_load(int idx) BPF_EXIT_INSN(), }; - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), - "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } static int usage(const char *argv0) diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 95aaaa846130..65b5fb51c1db 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -36,7 +36,7 @@ int main(int argc, char **argv) if (!cg2) goto err; - if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { log_err("Adding target cgroup to map"); goto err; } @@ -50,7 +50,7 @@ int main(int argc, char **argv) */ sync(); - bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); if (local_pid != remote_pid) { fprintf(stderr, @@ -64,10 +64,10 @@ int main(int argc, char **argv) goto err; remote_pid = 0; - bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); sync(); - bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); if (local_pid == remote_pid) { fprintf(stderr, "BPF cgroup negative test did not work\n"); diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 316230a0ed23..d96dc88d3b04 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -134,7 +134,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, int seen = 0; lru->total++; - if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { + if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { if (node) { list_move(&node->list, &lru->list); return 1; @@ -151,7 +151,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, node = list_last_entry(&lru->list, struct pfect_lru_node, list); - bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); + bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); } node->key = key; @@ -159,10 +159,10 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, lru->nr_misses++; if (seen) { - assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); + assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); } else { lru->nr_unique++; - assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); + assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); } return seen; @@ -285,11 +285,11 @@ static void do_test_lru_dist(int task, void *data) pfect_lru_lookup_or_insert(&pfect_lru, key); - if (!bpf_lookup_elem(lru_map_fd, &key, &value)) + if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) continue; - if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { - printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", + if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { + printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n", key, errno); assert(0); } @@ -358,19 +358,19 @@ static void test_lru_loss0(int map_type, int map_flags) for (key = 1; key <= 1000; key++) { int start_key, end_key; - assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); + assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); start_key = 101; end_key = min(key, 900); while (start_key <= end_key) { - bpf_lookup_elem(map_fd, &start_key, value); + bpf_map_lookup_elem(map_fd, &start_key, value); start_key++; } } for (key = 1; key <= 1000; key++) { - if (bpf_lookup_elem(map_fd, &key, value)) { + if (bpf_map_lookup_elem(map_fd, &key, value)) { if (key <= 100) old_unused_losses++; else if (key <= 900) @@ -408,10 +408,10 @@ static void test_lru_loss1(int map_type, int map_flags) value[0] = 1234; for (key = 1; key <= 1000; key++) - assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); for (key = 1; key <= 1000; key++) { - if (bpf_lookup_elem(map_fd, &key, value)) + if (bpf_map_lookup_elem(map_fd, &key, value)) nr_losses++; } @@ -436,7 +436,7 @@ static void do_test_parallel_lru_loss(int task, void *data) next_ins_key = stable_base; value[0] = 1234; for (i = 0; i < nr_stable_elems; i++) { - assert(bpf_update_elem(map_fd, &next_ins_key, value, + assert(bpf_map_update_elem(map_fd, &next_ins_key, value, BPF_NOEXIST) == 0); next_ins_key++; } @@ -448,9 +448,9 @@ static void do_test_parallel_lru_loss(int task, void *data) if (rn % 10) { key = rn % nr_stable_elems + stable_base; - bpf_lookup_elem(map_fd, &key, value); + bpf_map_lookup_elem(map_fd, &key, value); } else { - bpf_update_elem(map_fd, &next_ins_key, value, + bpf_map_update_elem(map_fd, &next_ins_key, value, BPF_NOEXIST); next_ins_key++; } @@ -458,7 +458,7 @@ static void do_test_parallel_lru_loss(int task, void *data) key = stable_base; for (i = 0; i < nr_stable_elems; i++) { - if (bpf_lookup_elem(map_fd, &key, value)) + if (bpf_map_lookup_elem(map_fd, &key, value)) nr_losses++; key++; } diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index a44bf347bedd..b5bf178a6ecc 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -50,7 +50,7 @@ int main(int ac, char **argv) mapped_addr_in->sin_port = htons(5555); mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); - assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); assert(listen(serverfd, 5) == 0); diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 9a130d31ecf2..704fe9fa77b2 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -61,14 +61,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%3lld %s;", count, key->comm); - if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) @@ -98,10 +98,10 @@ static void print_stacks(void) int fd = map_fd[0], stack_map = map_fd[1]; sys_read_seen = sys_write_seen = false; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); print_stack(&next_key, value); - bpf_delete_elem(fd, &next_key); + bpf_map_delete_elem(fd, &next_key); key = next_key; } @@ -111,8 +111,8 @@ static void print_stacks(void) } /* clear stack map */ - while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { - bpf_delete_elem(stack_map, &next_id); + while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) { + bpf_map_delete_elem(stack_map, &next_id); stackid = next_id; } } diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 661a7d052f2c..3bedd945def1 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -162,7 +162,7 @@ static void test_bpf_perf_event(void) pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); assert(pmu_fd >= 0); - assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); + assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); } diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 3e225e331f66..ded9804c5034 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -48,12 +48,12 @@ static void print_hist_for_pid(int fd, void *task) long max_value = 0; int i, ind; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { if (memcmp(&next_key, task, SIZE)) { key = next_key; continue; } - bpf_lookup_elem(fd, &next_key, values); + bpf_map_lookup_elem(fd, &next_key, values); value = 0; for (i = 0; i < nr_cpus; i++) value += values[i]; @@ -83,7 +83,7 @@ static void print_hist(int fd) int task_cnt = 0; int i; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { int found = 0; for (i = 0; i < task_cnt; i++) @@ -136,8 +136,8 @@ int main(int ac, char **argv) for (i = 0; i < 5; i++) { key = 0; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); printf("location 0x%lx count %ld\n", next_key, value); key = next_key; } diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index d0851cb4fa8d..8f7d199d5945 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -28,7 +28,7 @@ static void clear_stats(int fd) memset(values, 0, sizeof(values)); for (key = 0; key < SLOTS; key++) - bpf_update_elem(fd, &key, values, BPF_ANY); + bpf_map_update_elem(fd, &key, values, BPF_ANY); } const char *color[] = { @@ -89,7 +89,7 @@ static void print_hist(int fd) int i; for (key = 0; key < SLOTS; key++) { - bpf_lookup_elem(fd, &key, values); + bpf_map_lookup_elem(fd, &key, values); value = 0; for (i = 0; i < nr_cpus; i++) value += values[i]; diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index bc4a3bdea6ed..03449f773cb1 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -37,8 +37,8 @@ static void print_old_objects(int fd) key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ key = -1; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &v); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &v); key = next_key; if (val - v.val < 1000000000ll) /* object was allocated more then 1 sec ago */ diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 8ea4976cfcf1..179297cb4d35 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -36,7 +36,7 @@ static void test_bpf_perf_event(void) goto exit; } - bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); + bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); } diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 5f040a0d7712..d2be65d1fd86 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -43,7 +43,7 @@ static void poll_stats(int interval) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 7a71f5c74684..70e192fc61aa 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -51,7 +51,7 @@ static void poll_stats(unsigned int kill_after_s) for (proto = 0; proto < nr_protos; proto++) { __u64 sum = 0; - assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[proto][i]); @@ -237,8 +237,8 @@ int main(int argc, char **argv) while (min_port <= max_port) { vip.dport = htons(min_port++); - if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { - perror("bpf_update_elem(&vip2tnl)"); + if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + perror("bpf_map_update_elem(&vip2tnl)"); return 1; } } -- cgit v1.2.3-59-g8ed1b From 297e765e390a2ac996000b5f7228cbd84d995174 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Tue, 13 Dec 2016 11:40:57 +0100 Subject: uprobes: Fix uprobes on MIPS, allow for a cache flush after ixol breakpoint creation Commit: 72e6ae285a1d ('ARM: 8043/1: uprobes need icache flush after xol write' ... has introduced an arch-specific method to ensure all caches are flushed appropriately after an instruction is written to an XOL page. However, when the XOL area is created and the out-of-line breakpoint instruction is copied, caches are not flushed at all and stale data may be found in icache. Replace a simple copy_to_page() with arch_uprobe_copy_ixol() to allow the arch to ensure all caches are updated accordingly. This change fixes uprobes on MIPS InterAptiv (tested on Creator Ci40). Signed-off-by: Marcin Nowakowski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Victor Kamensky Cc: linux-mips@linux-mips.org Link: http://lkml.kernel.org/r/1481625657-22850-1-git-send-email-marcin.nowakowski@imgtec.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f9ec9add2164..b5916b4969a5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1194,7 +1194,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); atomic_set(&area->slot_count, 1); - copy_to_page(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); + arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); if (!xol_add_vma(mm, area)) return area; -- cgit v1.2.3-59-g8ed1b From 96c2fb69b92fcf6006dfb3017d6d887f8321407b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 15 Dec 2016 12:29:27 -0300 Subject: samples/bpf: Make perf_event_read() static While testing Joe's conversion of samples/bpf/ to use tools/lib/bpf/ I noticed some warnings building samples/bpf/ on a Fedora Rawhide container, with clang/llvm 3.9 I noticed this: [root@1e797fdfbf4f linux]# make -j4 O=/tmp/build/linux/ samples/bpf/ make[1]: Entering directory '/tmp/build/linux' CHK include/config/kernel.release GEN ./Makefile CHK include/generated/uapi/linux/version.h Using /git/linux as source for kernel HOSTCC samples/bpf/trace_output_user.o /git/linux/samples/bpf/trace_output_user.c:64:6: warning: no previous prototype for 'perf_event_read' [-Wmissing-prototypes] void perf_event_read(print_fn fn) ^~~~~~~~~~~~~~~ HOSTLD samples/bpf/trace_output make[1]: Leaving directory '/tmp/build/linux' Shut up the compiler by making that function static. Acked-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Joe Stringer Cc: Wang Nan Link: http://lkml.kernel.org/r/20161215152927.GC6866@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/trace_output_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 3bedd945def1..1a1da7bddb93 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -61,7 +61,7 @@ struct perf_event_sample { char data[]; }; -void perf_event_read(print_fn fn) +static void perf_event_read(print_fn fn) { __u64 data_tail = header->data_tail; __u64 data_head = header->data_head; -- cgit v1.2.3-59-g8ed1b From 2bd42f3aaa53ebe78b9be6f898b7945dd61f9773 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2016 20:56:54 +0100 Subject: perf trace: Check if MAP_32BIT is defined (again) There might be systems where MAP_32BIT is not defined, like some some RHEL7 powerpc versions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Kyle McMartin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 256763b01741 ("perf trace beauty mmap: Add more conditional defines") Link: http://lkml.kernel.org/r/1481831814-23683-1-git-send-email-jolsa@kernel.org [ Changed the Fixme cset to the one removing the conditional switch case for MAP_32BIT ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684..af1cfde6b97b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); -- cgit v1.2.3-59-g8ed1b From 9de3ffa1b714e6b8ebc1723f71bc9172a4470f7d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 Dec 2016 11:36:24 -0800 Subject: perf bench futex: Fix lock-pi help string Obvious copy/paste typo from the requeue program. Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/1481830584-30909-1-git-send-email-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-lock-pi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 465012b320ee..6d9d6c40a916 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -48,7 +48,7 @@ static const struct option options[] = { }; static const char * const bench_futex_lock_pi_usage[] = { - "perf bench futex requeue ", + "perf bench futex lock-pi ", NULL }; -- cgit v1.2.3-59-g8ed1b From edee44be59190bf22d5c6e521f3852b7ff16862f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:10:50 +0530 Subject: perf annotate: Don't throw error for zero length symbols 'perf report --tui' exits with error when it finds a sample of zero length symbol (i.e. addr == sym->start == sym->end). Actually these are valid samples. Don't exit TUI and show report with such symbols. Reported-and-Tested-by: Anton Blanchard Link: https://lkml.org/lkml/2016/10/8/189 Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Benjamin Herrenschmidt Cc: Chris Riyder Cc: linuxppc-dev@lists.ozlabs.org Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Cc: stable@kernel.org # v4.9+ Link: http://lkml.kernel.org/r/1479804050-5028-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c81a3950a7fe..06cc04e5806a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -647,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; -- cgit v1.2.3-59-g8ed1b From ed6c166cc7dc329736cace3affd2df984fb22ec8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Dec 2016 10:29:44 -0500 Subject: perf diff: Do not overwrite valid build id Fixes a perf diff regression issue which was introduced by commit 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") The binary name could be same when perf diff different binaries. Build id is used to distinguish between them. However, the previous patch assumes the same binary name has same build id. So it overwrites the build id according to the binary name, regardless of whether the build id is set or not. Check the has_build_id in dso__load. If the build id is already set, use it. Before the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% -99.80% tchain_edit [.] f2 0.12% +99.81% tchain_edit [.] f3 0.02% -0.01% [ixgbe] [k] ixgbe_read_reg After the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% +0.10% tchain_edit [.] f3 0.12% -0.08% tchain_edit [.] f2 Signed-off-by: Kan Liang Cc: Andi Kleen CC: Dima Kogan Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") Link: http://lkml.kernel.org/r/1481642984-13593-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index df2482b2ba45..dc93940de351 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); -- cgit v1.2.3-59-g8ed1b From 43371c83f382bd495a2294e91a32f30763cfdbef Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 14 Dec 2016 14:43:39 -0800 Subject: samples/bpf: Switch over to libbpf Now that libbpf under tools/lib/bpf/* is synced with the version from samples/bpf, we can get rid most of the libbpf library here. Committer notes: Built it in a docker fedora rawhide container and ran it in the f25 host, seems to work just like it did before this patch, i.e. the switch to tools/lib/bpf/ doesn't seem to have introduced problems and Joe said he tested it with all the entries in samples/bpf/ and other code he found: [root@f5065a7d6272 linux]# make -j4 O=/tmp/build/linux headers_install [root@f5065a7d6272 linux]# rm -rf /tmp/build/linux/samples/bpf/ [root@f5065a7d6272 linux]# make -j4 O=/tmp/build/linux samples/bpf/ make[1]: Entering directory '/tmp/build/linux' CHK include/config/kernel.release HOSTCC scripts/basic/fixdep GEN ./Makefile CHK include/generated/uapi/linux/version.h Using /git/linux as source for kernel CHK include/generated/utsrelease.h HOSTCC scripts/basic/bin2c HOSTCC arch/x86/tools/relocs_32.o HOSTCC arch/x86/tools/relocs_64.o LD samples/bpf/built-in.o HOSTCC samples/bpf/fds_example.o HOSTCC samples/bpf/sockex1_user.o /git/linux/samples/bpf/fds_example.c: In function 'bpf_prog_create': /git/linux/samples/bpf/fds_example.c:63:6: warning: passing argument 2 of 'bpf_load_program' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] insns, insns_cnt, "GPL", 0, ^~~~~ In file included from /git/linux/samples/bpf/libbpf.h:5:0, from /git/linux/samples/bpf/bpf_load.h:4, from /git/linux/samples/bpf/fds_example.c:15: /git/linux/tools/lib/bpf/bpf.h:31:5: note: expected 'struct bpf_insn *' but argument is of type 'const struct bpf_insn *' int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, ^~~~~~~~~~~~~~~~ HOSTCC samples/bpf/sockex2_user.o HOSTCC samples/bpf/xdp_tx_iptunnel_user.o clang -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/6.2.1/include -I/git/linux/arch/x86/include -I./arch/x86/include/generated/uapi -I./arch/x86/include/generated -I/git/linux/include -I./include -I/git/linux/arch/x86/include/uapi -I/git/linux/include/uapi -I./include/generated/uapi -include /git/linux/include/linux/kconfig.h \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -O2 -emit-llvm -c /git/linux/samples/bpf/sockex1_kern.c -o -| llc -march=bpf -filetype=obj -o samples/bpf/sockex1_kern.o HOSTLD samples/bpf/tc_l2_redirect HOSTLD samples/bpf/lwt_len_hist HOSTLD samples/bpf/xdp_tx_iptunnel make[1]: Leaving directory '/tmp/build/linux' [root@f5065a7d6272 linux]# And then, in the host: [root@jouet bpf]# mount | grep "docker.*devicemapper\/" /dev/mapper/docker-253:0-1705076-9bd8aa1e0af33adce89ff42090847868ca676932878942be53941a06ec5923f9 on /var/lib/docker/devicemapper/mnt/9bd8aa1e0af33adce89ff42090847868ca676932878942be53941a06ec5923f9 type xfs (rw,relatime,context="system_u:object_r:container_file_t:s0:c73,c276",nouuid,attr2,inode64,sunit=1024,swidth=1024,noquota) [root@jouet bpf]# cd /var/lib/docker/devicemapper/mnt/9bd8aa1e0af33adce89ff42090847868ca676932878942be53941a06ec5923f9/rootfs/tmp/build/linux/samples/bpf/ [root@jouet bpf]# file offwaketime offwaketime: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=f423d171e0487b2f802b6a792657f0f3c8f6d155, not stripped [root@jouet bpf]# readelf -SW offwaketime offwaketime offwaketime_kern.o offwaketime_user.o [root@jouet bpf]# readelf -SW offwaketime_kern.o There are 11 section headers, starting at offset 0x700: Section Headers: [Nr] Name Type Address Off Size ES Flg Lk Inf Al [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 [ 1] .strtab STRTAB 0000000000000000 000658 0000a8 00 0 0 1 [ 2] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4 [ 3] kprobe/try_to_wake_up PROGBITS 0000000000000000 000040 0000d8 00 AX 0 0 8 [ 4] .relkprobe/try_to_wake_up REL 0000000000000000 0005a8 000020 10 10 3 8 [ 5] tracepoint/sched/sched_switch PROGBITS 0000000000000000 000118 000318 00 AX 0 0 8 [ 6] .reltracepoint/sched/sched_switch REL 0000000000000000 0005c8 000090 10 10 5 8 [ 7] maps PROGBITS 0000000000000000 000430 000050 00 WA 0 0 4 [ 8] license PROGBITS 0000000000000000 000480 000004 00 WA 0 0 1 [ 9] version PROGBITS 0000000000000000 000484 000004 00 WA 0 0 4 [10] .symtab SYMTAB 0000000000000000 000488 000120 18 1 4 8 Key to Flags: W (write), A (alloc), X (execute), M (merge), S (strings) I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown) O (extra OS processing required) o (OS specific), p (processor specific) [root@jouet bpf]# ./offwaketime | head -3 qemu-system-x86;entry_SYSCALL_64_fastpath;sys_ppoll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule;__schedule;-;try_to_wake_up;hrtimer_wakeup;__hrtimer_run_queues;hrtimer_interrupt;local_apic_timer_interrupt;smp_apic_timer_interrupt;__irqentry_text_start;cpuidle_enter_state;cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel;start_cpu;;swapper/0 4 firefox;entry_SYSCALL_64_fastpath;sys_poll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule;__schedule;-;try_to_wake_up;pollwake;__wake_up_common;__wake_up_sync_key;pipe_write;__vfs_write;vfs_write;sys_write;entry_SYSCALL_64_fastpath;;Timer 1 swapper/2;start_cpu;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule;__schedule;-;---;; 61 [root@jouet bpf]# Signed-off-by: Joe Stringer Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Cc: netdev@vger.kernel.org Link: https://github.com/joestringer/linux/commit/5c40f54a52b1f437123c81e21873f4b4b1f9bd55.patch Link: http://lkml.kernel.org/n/tip-xr8twtx7sjh5821g8qw47yxk@git.kernel.org [ Use -I$(srctree)/tools/lib/ to support out of source code tree builds, as noticed by Wang Nan ] Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/Makefile | 68 +++++++++++++----------- samples/bpf/README.rst | 4 +- samples/bpf/bpf_load.c | 3 +- samples/bpf/fds_example.c | 3 +- samples/bpf/libbpf.c | 111 --------------------------------------- samples/bpf/libbpf.h | 19 +------ samples/bpf/sock_example.c | 3 +- samples/bpf/test_cgrp2_attach.c | 3 +- samples/bpf/test_cgrp2_attach2.c | 3 +- samples/bpf/test_cgrp2_sock.c | 3 +- 10 files changed, 52 insertions(+), 168 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f2219c1489e5..81b0ef2f7994 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -35,40 +35,43 @@ hostprogs-y += tc_l2_redirect hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel -test_lru_dist-objs := test_lru_dist.o libbpf.o -sock_example-objs := sock_example.o libbpf.o -fds_example-objs := bpf_load.o libbpf.o fds_example.o -sockex1-objs := bpf_load.o libbpf.o sockex1_user.o -sockex2-objs := bpf_load.o libbpf.o sockex2_user.o -sockex3-objs := bpf_load.o libbpf.o sockex3_user.o -tracex1-objs := bpf_load.o libbpf.o tracex1_user.o -tracex2-objs := bpf_load.o libbpf.o tracex2_user.o -tracex3-objs := bpf_load.o libbpf.o tracex3_user.o -tracex4-objs := bpf_load.o libbpf.o tracex4_user.o -tracex5-objs := bpf_load.o libbpf.o tracex5_user.o -tracex6-objs := bpf_load.o libbpf.o tracex6_user.o -test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o -trace_output-objs := bpf_load.o libbpf.o trace_output_user.o -lathist-objs := bpf_load.o libbpf.o lathist_user.o -offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o -spintest-objs := bpf_load.o libbpf.o spintest_user.o -map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o -test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o -test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o -test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o -test_cgrp2_attach2-objs := libbpf.o test_cgrp2_attach2.o cgroup_helpers.o -test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o -test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o -xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# Libbpf dependencies +LIBBPF := libbpf.o ../../tools/lib/bpf/bpf.o + +test_lru_dist-objs := test_lru_dist.o $(LIBBPF) +sock_example-objs := sock_example.o $(LIBBPF) +fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o +sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o +sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o +sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o +tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o +tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o +tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o +tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o +tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o +tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o +trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o +lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o +offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o +spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o +map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o +test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o +test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o +test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o +test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o +test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o +xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally -xdp2-objs := bpf_load.o libbpf.o xdp1_user.o -test_current_task_under_cgroup-objs := bpf_load.o libbpf.o cgroup_helpers.o \ +xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ test_current_task_under_cgroup_user.o -trace_event-objs := bpf_load.o libbpf.o trace_event_user.o -sampleip-objs := bpf_load.o libbpf.o sampleip_user.o -tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o -lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o +trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o +sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o +tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o +lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o +xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -104,6 +107,7 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o HOSTCFLAGS += -I$(objtree)/usr/include +HOSTCFLAGS += -I$(srctree)/tools/lib/ HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index a43eae3f0551..79f9a58f1872 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -1,8 +1,8 @@ eBPF sample programs ==================== -This directory contains a mini eBPF library, test stubs, verifier -test-suite and examples for using eBPF. +This directory contains a test stubs, verifier test-suite and examples +for using eBPF. The examples use libbpf from tools/lib/bpf. Build dependencies ================== diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index f5b186c46b7c..1bfb43394013 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -66,6 +66,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -95,7 +96,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } - fd = bpf_load_program(prog_type, prog, size, license, kern_version, + fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, bpf_log_buf, BPF_LOG_BUF_SIZE); if (fd < 0) { printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 8a4fc4ef3993..6245062844d1 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -53,13 +53,14 @@ static int bpf_prog_create(const char *object) BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); if (object) { assert(!load_bpf_file((char *)object)); return prog_fd[0]; } else { return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, - insns, sizeof(insns), "GPL", 0, + insns, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } } diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 6f076abdca35..3391225ad7e9 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -4,8 +4,6 @@ #include #include #include -#include -#include #include #include #include @@ -13,96 +11,6 @@ #include #include "libbpf.h" -static __u64 ptr_to_u64(void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, int map_flags) -{ - union bpf_attr attr = { - .map_type = map_type, - .key_size = key_size, - .value_size = value_size, - .max_entries = max_entries, - .map_flags = map_flags, - }; - - return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); -} - -int bpf_map_update_elem(int fd, void *key, void *value, unsigned long long flags) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .value = ptr_to_u64(value), - .flags = flags, - }; - - return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_lookup_elem(int fd, void *key, void *value) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .value = ptr_to_u64(value), - }; - - return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_delete_elem(int fd, void *key) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - }; - - return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_get_next_key(int fd, void *key, void *next_key) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .next_key = ptr_to_u64(next_key), - }; - - return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); -} - -#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) - -int bpf_load_program(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int prog_len, - const char *license, int kern_version, - char *log_buf, size_t log_buf_sz) -{ - union bpf_attr attr = { - .prog_type = prog_type, - .insns = ptr_to_u64((void *) insns), - .insn_cnt = prog_len / sizeof(struct bpf_insn), - .license = ptr_to_u64((void *) license), - .log_buf = ptr_to_u64(log_buf), - .log_size = log_buf_sz, - .log_level = 1, - }; - - /* assign one field outside of struct init to make sure any - * padding is zero initialized - */ - attr.kern_version = kern_version; - - log_buf[0] = 0; - - return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); -} - int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr = { @@ -124,25 +32,6 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); } -int bpf_obj_pin(int fd, const char *pathname) -{ - union bpf_attr attr = { - .pathname = ptr_to_u64((void *)pathname), - .bpf_fd = fd, - }; - - return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr)); -} - -int bpf_obj_get(const char *pathname) -{ - union bpf_attr attr = { - .pathname = ptr_to_u64((void *)pathname), - }; - - return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr)); -} - int open_raw_sock(const char *name) { struct sockaddr_ll sll; diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 20e3457857ca..cf7d2386d1f9 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -2,28 +2,13 @@ #ifndef __LIBBPF_H #define __LIBBPF_H -struct bpf_insn; - -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, int map_flags); -int bpf_map_update_elem(int fd, void *key, void *value, unsigned long long flags); -int bpf_map_lookup_elem(int fd, void *key, void *value); -int bpf_map_delete_elem(int fd, void *key); -int bpf_map_get_next_key(int fd, void *key, void *next_key); +#include -int bpf_load_program(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int insn_len, - const char *license, int kern_version, - char *log_buf, size_t log_buf_sz); +struct bpf_insn; int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -int bpf_obj_pin(int fd, const char *pathname); -int bpf_obj_get(const char *pathname); - -#define BPF_LOG_BUF_SIZE (256 * 1024) - /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index d6b91e9a38ad..5546f8aac37e 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -56,8 +56,9 @@ static int test_sock(void) BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { printf("failed to load prog '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 8283ef86d392..504058631ffc 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -68,9 +68,10 @@ static int prog_load(int map_fd, int verdict) BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0, + prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index fc6092fdc3b0..6e69be37f87f 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -41,9 +41,10 @@ static int prog_load(int verdict) BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0, + prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (ret < 0) { diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index 43b4bde5d05c..0791b949cbe4 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -35,8 +35,9 @@ static int prog_load(int idx) BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } -- cgit v1.2.3-59-g8ed1b From 5dc880de6e7c8566fbc8bc5dfc3a922d2d1c5ee3 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 14 Dec 2016 14:05:26 -0800 Subject: tools lib bpf: Add bpf_prog_{attach,detach} Commit d8c5b17f2bc0 ("samples: bpf: add userspace example for attaching eBPF programs to cgroups") added these functions to samples/libbpf, but during this merge all of the samples libbpf functionality is shifting to tools/lib/bpf. Shift these functions there. Committer notes: Use bzero + attr.FIELD = value instead of 'attr = { .FIELD = value, just like the other wrapper calls to sys_bpf with bpf_attr to make this build in older toolchais, such as the ones in CentOS 5 and 6. Signed-off-by: Joe Stringer Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-au2zvtsh55vqeo3v3uw7jr4c@git.kernel.org Link: https://github.com/joestringer/linux/commit/353e6f298c3d0a92fa8bfa61ff898c5050261a12.patch Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/libbpf.c | 21 --------------------- samples/bpf/libbpf.h | 3 --- tools/lib/bpf/bpf.c | 23 +++++++++++++++++++++++ tools/lib/bpf/bpf.h | 3 +++ 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 3391225ad7e9..d9af876b4a2c 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -11,27 +11,6 @@ #include #include "libbpf.h" -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr = { - .target_fd = target_fd, - .attach_bpf_fd = prog_fd, - .attach_type = type, - }; - - return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); -} - -int bpf_prog_detach(int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr = { - .target_fd = target_fd, - .attach_type = type, - }; - - return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); -} - int open_raw_sock(const char *name) { struct sockaddr_ll sll; diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index cf7d2386d1f9..cc815624aacf 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -6,9 +6,6 @@ struct bpf_insn; -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); -int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); - /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d0afb26c2e0f..3ddb58a36d3c 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -167,3 +167,26 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } + +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 7fcdce16fd62..a2f9853dd882 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,5 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); + #endif -- cgit v1.2.3-59-g8ed1b From 811b4f0d785d33eabfff5d0ea60596b6b8bdc825 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Dec 2016 11:03:13 -0300 Subject: samples/bpf: Be consistent with bpf_load_program bpf_insn parameter Only one of the examples declare the bpf_insn bpf proggie as a const: $ grep 'struct bpf_insn [a-z]' samples/bpf/*.c samples/bpf/fds_example.c: static const struct bpf_insn insns[] = { samples/bpf/sock_example.c: struct bpf_insn prog[] = { samples/bpf/test_cgrp2_attach2.c: struct bpf_insn prog[] = { samples/bpf/test_cgrp2_attach.c: struct bpf_insn prog[] = { samples/bpf/test_cgrp2_sock.c: struct bpf_insn prog[] = { $ Which causes this warning: [root@f5065a7d6272 linux]# make -j4 O=/tmp/build/linux samples/bpf/ HOSTCC samples/bpf/fds_example.o /git/linux/samples/bpf/fds_example.c: In function 'bpf_prog_create': /git/linux/samples/bpf/fds_example.c:63:6: warning: passing argument 2 of 'bpf_load_program' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] insns, insns_cnt, "GPL", 0, ^~~~~ In file included from /git/linux/samples/bpf/libbpf.h:5:0, from /git/linux/samples/bpf/bpf_load.h:4, from /git/linux/samples/bpf/fds_example.c:15: /git/linux/tools/lib/bpf/bpf.h:31:5: note: expected 'struct bpf_insn *' but argument is of type 'const struct bpf_insn *' int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, ^~~~~~~~~~~~~~~~ HOSTCC samples/bpf/sockex1_user.o So just ditch that 'const' to reduce build noise, leaving changing the bpf_load_program() bpf_insn parameter to const to a later patch, if deemed adequate. Cc: Joe Stringer Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-1z5xee8n3oa66jf62bpv16ed@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/fds_example.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 6245062844d1..a5cddc99cccd 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -49,7 +49,7 @@ static int bpf_map_create(void) static int bpf_prog_create(const char *object) { - static const struct bpf_insn insns[] = { + static struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }; -- cgit v1.2.3-59-g8ed1b From 205c8ada314f78e6637342089e5b585a051d6cf5 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:19 -0800 Subject: samples/bpf: Remove perf_event_open() declaration This declaration was made in samples/bpf/libbpf.c for convenience, but there's already one in tools/perf/perf-sys.h. Reuse that one. Committer notes: Testing it: $ make -j4 O=../build/v4.9.0-rc8+ samples/bpf/ make[1]: Entering directory '/home/build/v4.9.0-rc8+' CHK include/config/kernel.release GEN ./Makefile CHK include/generated/uapi/linux/version.h Using /home/acme/git/linux as source for kernel CHK include/generated/utsrelease.h CHK include/generated/timeconst.h CHK include/generated/bounds.h CHK include/generated/asm-offsets.h CALL /home/acme/git/linux/scripts/checksyscalls.sh HOSTCC samples/bpf/test_verifier.o HOSTCC samples/bpf/libbpf.o HOSTCC samples/bpf/../../tools/lib/bpf/bpf.o HOSTCC samples/bpf/test_maps.o HOSTCC samples/bpf/sock_example.o HOSTCC samples/bpf/bpf_load.o HOSTLD samples/bpf/trace_event HOSTLD samples/bpf/sampleip HOSTLD samples/bpf/tc_l2_redirect make[1]: Leaving directory '/home/build/v4.9.0-rc8+' $ Also tested the offwaketime resulting from the rebuild, seems to work as before. Signed-off-by: Joe Stringer Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Link: http://lkml.kernel.org/r/20161209024620.31660-7-joe@ovn.org [ Use -I$(srctree)/tools/lib/ to support out of source code tree builds ] Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/Makefile | 2 ++ samples/bpf/bpf_load.c | 3 ++- samples/bpf/libbpf.c | 7 ------- samples/bpf/libbpf.h | 3 --- samples/bpf/sampleip_user.c | 3 ++- samples/bpf/trace_event_user.c | 9 +++++---- samples/bpf/trace_output_user.c | 3 ++- samples/bpf/tracex6_user.c | 3 ++- 8 files changed, 15 insertions(+), 18 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 81b0ef2f7994..5a73f5a7ace1 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -109,6 +109,8 @@ always += xdp_tx_iptunnel_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include +HOSTCFLAGS += -I$(srctree)/tools/perf HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_fds_example += -lelf diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 1bfb43394013..396e204888b3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -23,6 +23,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define DEBUGFS "/sys/kernel/debug/tracing/" @@ -179,7 +180,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) id = atoi(buf); attr.config = id; - efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); if (efd < 0) { printf("event %d fd %d err %s\n", id, efd, strerror(errno)); return -1; diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index d9af876b4a2c..bee473a494f1 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -34,10 +34,3 @@ int open_raw_sock(const char *name) return sock; } - -int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, - int group_fd, unsigned long flags) -{ - return syscall(__NR_perf_event_open, attr, pid, cpu, - group_fd, flags); -} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index cc815624aacf..09aedc320009 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -188,7 +188,4 @@ struct bpf_insn; /* create RAW socket and bind to interface 'name' */ int open_raw_sock(const char *name); -struct perf_event_attr; -int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, - int group_fd, unsigned long flags); #endif diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 5ac5adf75931..be59d7dcbdde 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -21,6 +21,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define DEFAULT_FREQ 99 #define DEFAULT_SECS 5 @@ -49,7 +50,7 @@ static int sampling_start(int *pmu_fd, int freq) }; for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, + pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i, -1 /* group_fd */, 0 /* flags */); if (pmu_fd[i] < 0) { fprintf(stderr, "ERROR: Initializing perf sampling\n"); diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 704fe9fa77b2..0c5561d193a4 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -20,6 +20,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define SAMPLE_FREQ 50 @@ -125,9 +126,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); + pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); if (pmu_fd[i] < 0) { - printf("perf_event_open failed\n"); + printf("sys_perf_event_open failed\n"); goto all_cpu_err; } assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); @@ -146,9 +147,9 @@ static void test_perf_event_task(struct perf_event_attr *attr) int pmu_fd; /* open task bound event */ - pmu_fd = perf_event_open(attr, 0, -1, -1, 0); + pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { - printf("perf_event_open failed\n"); + printf("sys_perf_event_open failed\n"); return; } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 1a1da7bddb93..f4fa6af22def 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -21,6 +21,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" static int pmu_fd; @@ -159,7 +160,7 @@ static void test_bpf_perf_event(void) }; int key = 0; - pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); assert(pmu_fd >= 0); assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 179297cb4d35..ca7874ed77f4 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -10,6 +10,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL @@ -30,7 +31,7 @@ static void test_bpf_perf_event(void) }; for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); + pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); if (pmu_fd[i] < 0) { printf("event syscall failed\n"); goto exit; -- cgit v1.2.3-59-g8ed1b From 9899694a7f67714216665b87318eb367e2c5c901 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Dec 2016 18:46:20 -0800 Subject: samples/bpf: Move open_raw_sock to separate header This function was declared in libbpf.c and was the only remaining function in this library, but has nothing to do with BPF. Shift it out into a new header, sock_example.h, and include it from the relevant samples. Signed-off-by: Joe Stringer Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Wang Nan Link: http://lkml.kernel.org/r/20161209024620.31660-8-joe@ovn.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/Makefile | 2 +- samples/bpf/fds_example.c | 1 + samples/bpf/libbpf.c | 36 ------------------------------------ samples/bpf/libbpf.h | 3 --- samples/bpf/sock_example.c | 1 + samples/bpf/sock_example.h | 35 +++++++++++++++++++++++++++++++++++ samples/bpf/sockex1_user.c | 1 + samples/bpf/sockex2_user.c | 1 + samples/bpf/sockex3_user.c | 1 + 9 files changed, 41 insertions(+), 40 deletions(-) delete mode 100644 samples/bpf/libbpf.c create mode 100644 samples/bpf/sock_example.h diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5a73f5a7ace1..f01b66f277b0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -36,7 +36,7 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel # Libbpf dependencies -LIBBPF := libbpf.o ../../tools/lib/bpf/bpf.o +LIBBPF := ../../tools/lib/bpf/bpf.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index a5cddc99cccd..e29bd52ff9e8 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -14,6 +14,7 @@ #include "bpf_load.h" #include "libbpf.h" +#include "sock_example.h" #define BPF_F_PIN (1 << 0) #define BPF_F_GET (1 << 1) diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c deleted file mode 100644 index bee473a494f1..000000000000 --- a/samples/bpf/libbpf.c +++ /dev/null @@ -1,36 +0,0 @@ -/* eBPF mini library */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "libbpf.h" - -int open_raw_sock(const char *name) -{ - struct sockaddr_ll sll; - int sock; - - sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); - if (sock < 0) { - printf("cannot create raw socket\n"); - return -1; - } - - memset(&sll, 0, sizeof(sll)); - sll.sll_family = AF_PACKET; - sll.sll_ifindex = if_nametoindex(name); - sll.sll_protocol = htons(ETH_P_ALL); - if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { - printf("bind to %s: %s\n", name, strerror(errno)); - close(sock); - return -1; - } - - return sock; -} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 09aedc320009..3705fba453a0 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -185,7 +185,4 @@ struct bpf_insn; .off = 0, \ .imm = 0 }) -/* create RAW socket and bind to interface 'name' */ -int open_raw_sock(const char *name); - #endif diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 5546f8aac37e..6fc6e193ef1b 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -27,6 +27,7 @@ #include #include #include "libbpf.h" +#include "sock_example.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h new file mode 100644 index 000000000000..09f7fe7e5fd7 --- /dev/null +++ b/samples/bpf/sock_example.h @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" + +static inline int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 9454448bf198..6cd2feb3e9b3 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -3,6 +3,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include #include diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 6a40600d5a83..0e0207c90841 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -3,6 +3,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include #include #include diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 9099c4255f23..b5524d417eb5 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -3,6 +3,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include #include #include -- cgit v1.2.3-59-g8ed1b From daa864b8f8e34477bde817f26d736d89dc6032f3 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 22 Dec 2016 00:29:26 -0800 Subject: perf/x86/pebs: Fix handling of PEBS buffer overflows This patch solves a race condition between PEBS and the PMU handler. In case multiple PEBS events are sampled at the same time, it is possible to have GLOBAL_STATUS bit 62 set indicating PEBS buffer overflow and also seeing at most 3 PEBS counters having their bits set in the status register. This is a sign that there was at least one PEBS record pending at the time of the PMU interrupt. PEBS counters must only be processed via the drain_pebs() calls, and not via the regular sample processing loop coming after that the function, otherwise phony regular samples may be generated in the sampling buffer not marked with the EXACT tag. Another possibility is to have one PEBS event and at least one non-PEBS event whic hoverflows while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will not be set, yet the overflow status bit for the PEBS counter will be on Skylake. To avoid this problem, we systematically ignore the PEBS-enabled counters from the GLOBAL_STATUS mask and we always process PEBS events via drain_pebs(). The problem manifested itself by having non-exact samples when sampling only PEBS events, i.e., the PERF_SAMPLE_RECORD would not have the EXACT flag set. Note that this problem is only present on Skylake processor. This fix is harmless on older processors. Reported-by: Peter Zijlstra Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1482395366-8992-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cb8522290e6a..86138267b68a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2110,6 +2110,27 @@ again: GLOBAL_STATUS_LBRS_FROZEN); if (!status) goto done; + /* + * In case multiple PEBS events are sampled at the same time, + * it is possible to have GLOBAL_STATUS bit 62 set indicating + * PEBS buffer overflow and also seeing at most 3 PEBS counters + * having their bits set in the status register. This is a sign + * that there was at least one PEBS record pending at the time + * of the PMU interrupt. PEBS counters must only be processed + * via the drain_pebs() calls and not via the regular sample + * processing loop coming after that the function, otherwise + * phony regular samples may be generated in the sampling buffer + * not marked with the EXACT tag. Another possibility is to have + * one PEBS event and at least one non-PEBS event whic hoverflows + * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will + * not be set, yet the overflow status bit for the PEBS counter will + * be on Skylake. + * + * To avoid this problem, we systematically ignore the PEBS-enabled + * counters from the GLOBAL_STATUS mask and we always process PEBS + * events via drain_pebs(). + */ + status &= ~cpuc->pebs_enabled; /* * PEBS overflow sets bit 62 in the global status register @@ -2117,15 +2138,6 @@ again: if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); - /* - * There are cases where, even though, the PEBS ovfl bit is set - * in GLOBAL_OVF_STATUS, the PEBS events may also have their - * overflow bits set for their counters. We must clear them - * here because they have been processed as exact samples in - * the drain_pebs() routine. They must not be processed again - * in the for_each_bit_set() loop for regular samples below. - */ - status &= ~cpuc->pebs_enabled; status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } -- cgit v1.2.3-59-g8ed1b From 1134c2b5cb840409ffd966d8c2a9468f64e6a494 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Nov 2016 16:51:53 +0100 Subject: perf/x86: Fix overlap counter scheduling bug Jiri reported the overlap scheduling exceeding its max stack. Looking at the constraint that triggered this, it turns out the overlap marker isn't needed. The comment with EVENT_CONSTRAINT_OVERLAP states: "This is the case if the counter mask of such an event is not a subset of any other counter mask of a constraint with an equal or higher weight". Esp. that latter part is of interest here I think, our overlapping mask is 0x0e, that has 3 bits set and is the highest weight mask in on the PMU, therefore it will be placed last. Can we still create a scenario where we would need to rewind that? The scenario for AMD Fam15h is we're having masks like: 0x3F -- 111111 0x38 -- 111000 0x07 -- 000111 0x09 -- 001001 And we mark 0x09 as overlapping, because it is not a direct subset of 0x38 or 0x07 and has less weight than either of those. This means we'll first try and place the 0x09 event, then try and place 0x38/0x07 events. Now imagine we have: 3 * 0x07 + 0x09 and the initial pick for the 0x09 event is counter 0, then we'll fail to place all 0x07 events. So we'll pop back, try counter 4 for the 0x09 event, and then re-try all 0x07 events, which will now work. The masks on the PMU in question are: 0x01 - 0001 0x03 - 0011 0x0e - 1110 0x0c - 1100 But since all the masks that have overlap (0xe -> {0xc,0x3}) and (0x3 -> 0x1) are of heavier weight, it should all work out. Reported-by: Jiri Olsa Tested-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Liang Kan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20161109155153.GQ3142@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 272427700d48..e6832be714bc 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -669,7 +669,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), - EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), + UNCORE_EVENT_CONSTRAINT(0x1f, 0xe), UNCORE_EVENT_CONSTRAINT(0x21, 0x3), UNCORE_EVENT_CONSTRAINT(0x23, 0x3), UNCORE_EVENT_CONSTRAINT(0x31, 0x3), -- cgit v1.2.3-59-g8ed1b From 0e6758e8231d5905c2e267566e9bd679a68a7b00 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Honour 'comm_width' when aligning the headers Current default value is 20, but that may change in the future, so make places where we have 20 hardcoded use 'comm_width'. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c1c07bfe132c..020f9c9cef7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1817,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - printf(" %-20s %9s %9s %9s", + printf(" %-*s %9s %9s %9s", comm_width, "task name", "wait time", "sch delay", "run time"); printf("\n"); @@ -1830,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + printf(" %-*s %9s %9s %9s\n", comm_width, + "[tid/pid]", "(msec)", "(msec)", "(msec)"); /* * separator @@ -1840,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - printf(" %.20s %.9s %.9s %.9s", + printf(" %.*s %.9s %.9s %.9s", comm_width, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line); -- cgit v1.2.3-59-g8ed1b From 9b8087d72086b249ff744cee237ad12cc5e9255d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Enlarge default 'comm_width' Current default value is 20 but it's easily changed to a bigger value as task has a long name and different tid and pid. And it makes the output not aligned. So change it to have a large value as summary shows. Committer notes: Before: # perf sched record ^C # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 40602.771776 [0000] 0.001 0.000 1.892 After: # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 020f9c9cef7b..c65f16cbae62 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1775,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) return r->last_time[cpu]; } -static int comm_width = 20; +static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) { -- cgit v1.2.3-59-g8ed1b From 4fa0d1aa2711a5053fb2422331bdf6bce99b5f87 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Remove hardcoded 'comm_width' check at print_summary Now that the default 'comm_width' value is 30, no need to check that at print_summary, Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c65f16cbae62..5052caa91caa 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2627,9 +2627,6 @@ static void timehist_print_summary(struct perf_sched *sched, memset(&totals, 0, sizeof(totals)); - if (comm_width < 30) - comm_width = 30; - if (sched->idle_hist) { printf("\nIdle-time summary\n"); printf("%*s parent sched-out ", comm_width, "comm"); -- cgit v1.2.3-59-g8ed1b From bdd75729e5d279d734e8d3fb41ef4818ac1598ab Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:49 +0900 Subject: perf sched timehist: Fix invalid period calculation When --time option is given with a value outside recorded time, the last sample time (tprev) was set to that value and run time calculation might be incorrect. This is a problem of the first samples for each cpus since it would skip the runtime update when tprev is 0. But with --time option it had non-zero (which is invalid) value so the calculation is also incorrect. For example, let's see the followging: $ perf sched timehist time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 3195.968367 [0003] 0.000 0.000 0.000 3195.968386 [0002] Timer[4306/4277] 0.000 0.000 0.018 3195.968397 [0002] Web Content[4277] 0.000 0.000 0.000 3195.968595 [0001] JS Helper[4302/4277] 0.000 0.000 0.000 3195.969217 [0000] 0.000 0.000 0.621 3195.969251 [0001] kworker/1:1H[291] 0.000 0.000 0.033 The sample starts at 3195.968367 but when I gave a time interval from 3194 to 3196 (in sec) it will calculate the whole 2 second as runtime. In below, 2 cpus accounted it as runtime, other 2 cpus accounted it as idle time. Before: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 1995.991 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 1999.852 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 3724.940 After: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 10.811 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 18.337 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 18.139 Committer notes: Further testing: Before: Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 937.944 msec CPU 2 idle for 188.931 msec CPU 3 idle for 986.185 msec After: # perf sched timehist --time 40602,40603 -s | tail Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 175.407 msec CPU 2 idle for 188.931 msec CPU 3 idle for 223.657 msec Total number of unique tasks: 68 Total number of context switches: 814 Total run time (msec): 97.688 # for cpu in `seq 0 3` ; do echo -n "CPU $cpu idle for " ; perf sched timehist --time 40602,40603 | grep "\[000${cpu}\].*\" | tr -s ' ' | cut -d' ' -f7 | awk '{entries++ ; s+=$1} END {print s " msec (entries: " entries ")"}' ; done CPU 0 idle for 229.721 msec (entries: 123) CPU 1 idle for 175.381 msec (entries: 65) CPU 2 idle for 188.903 msec (entries: 56) CPU 3 idle for 223.61 msec (entries: 102) Difference due to the idle stats being accounted at nanoseconds precision while the entries in 'perf sched timehist' are trucated at msec.usec. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 853b74071110 ("perf sched timehist: Add option to specify time window of interest") Link: http://lkml.kernel.org/r/20161222060350.17655-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5052caa91caa..d53e706a6f17 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2405,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, if (ptime->start && ptime->start > t) goto out; - if (ptime->start > tprev) + if (tprev && ptime->start > tprev) tprev = ptime->start; /* -- cgit v1.2.3-59-g8ed1b