aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/bench
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-10-24 20:42:42 +0200
committerIngo Molnar <mingo@kernel.org>2016-10-24 20:42:42 +0200
commit76e2d2617d767c445498c4c4b1162eb2201cdd77 (patch)
treee03764dba70ea6993366e25d16e1735b2d40cd26 /tools/perf/bench
parentMerge tag 'perf-c2c-for-mingo-20161021' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (diff)
parentperf coresight: Removing miscellaneous debug output (diff)
downloadlinux-dev-76e2d2617d767c445498c4c4b1162eb2201cdd77.tar.xz
linux-dev-76e2d2617d767c445498c4c4b1162eb2201cdd77.zip
Merge tag 'perf-core-for-mingo-20161024' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Dynamicly change verbosity level by pressing 'V' in the 'perf top/report' hists TUI browser (Alexis Berlemont) - Implement 'perf trace --delay' in the same fashion as in 'perf record --delay', to skip sampling workload initialization events (Alexis Berlemont) - Make vendor named events case insensitive in 'perf list', i.e. 'perf list LONGEST_LAT' works just the same as 'perf list longest_lat' (Andi Kleen) - Show instruction bytes and lenght in 'perf script' for Intel PT and BTS (Andi Kleen, Adrian Hunter) E.g: % perf record -e intel_pt// foo % perf script --itrace=i0ns -F ip,insn,insnlen ffffffff8101232f ilen: 5 insn: 0f 1f 44 00 00 ffffffff81012334 ilen: 1 insn: 5b ffffffff81012335 ilen: 1 insn: 5d ffffffff81012336 ilen: 1 insn: c3 ffffffff810123e3 ilen: 1 insn: 5b ffffffff810123e4 ilen: 2 insn: 41 5c ffffffff810123e6 ilen: 1 insn: 5d ffffffff810123e7 ilen: 1 insn: c3 ffffffff810124a6 ilen: 2 insn: 31 c0 ffffffff810124a8 ilen: 9 insn: 41 83 bc 24 a8 01 00 00 01 ffffffff810124b1 ilen: 2 insn: 75 87 - Allow enabling the perf_event_attr.branch_type attribute member: (Andi Kleen) perf record -e sched:sched_switch,cpu/cpu-cycles,branch_type=any/ ... - Add unwinding support for jitdump (Stefano Sanfilippo) Fixes: - Use raw_syscall:sys_enter timestamp in 'perf trace' (Arnaldo Carvalho de Melo) Infrastructure: - Allow jitdump to be built without libdwarf (Maciej Debski) - Sync x86's syscall table tools/ copy (Arnaldo Carvalho de Melo) - Fixes to avoid calling die() in library fuctions already propagating other errors (Arnaldo Carvalho de Melo) - Improvements to allow libtraceevent to be properly installed in distro packages (Jiri Olsa) - Removing coresight miscellaneous debug output (Mathieu Poirier) - Cache align the 'perf bench futex' worker struct (Sebastian Andrzej Siewior) Documentation: - Minor improvements on the documentation of event parameters (Andi Kleen) - Add jitdump format specification document (Stephane Eranian) Spelling fixes: - Fix typo "No enough" to "Not enough" (Alexander Alemayhu) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/futex-hash.c5
-rw-r--r--tools/perf/bench/mem-functions.c77
2 files changed, 34 insertions, 48 deletions
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8024cd5febd2..d9e5e80bb4d0 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -39,12 +39,15 @@ static unsigned int threads_starting;
static struct stats throughput_stats;
static pthread_cond_t thread_parent, thread_worker;
+#define SMP_CACHE_BYTES 256
+#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES)))
+
struct worker {
int tid;
u_int32_t *futex;
pthread_t thread;
unsigned long ops;
-};
+} __cacheline_aligned;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index c684910e5a48..52504a83b5a1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -106,9 +106,10 @@ static double timeval2double(struct timeval *ts)
struct bench_mem_info {
const struct function *functions;
- u64 (*do_cycles)(const struct function *r, size_t size);
- double (*do_gettimeofday)(const struct function *r, size_t size);
+ u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
+ double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
const char *const *usage;
+ bool alloc_src;
};
static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
@@ -116,16 +117,26 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
const struct function *r = &info->functions[r_idx];
double result_bps = 0.0;
u64 result_cycles = 0;
+ void *src = NULL, *dst = zalloc(size);
printf("# function '%s' (%s)\n", r->name, r->desc);
+ if (dst == NULL)
+ goto out_alloc_failed;
+
+ if (info->alloc_src) {
+ src = zalloc(size);
+ if (src == NULL)
+ goto out_alloc_failed;
+ }
+
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s bytes ...\n\n", size_str);
if (use_cycles) {
- result_cycles = info->do_cycles(r, size);
+ result_cycles = info->do_cycles(r, size, src, dst);
} else {
- result_bps = info->do_gettimeofday(r, size);
+ result_bps = info->do_gettimeofday(r, size, src, dst);
}
switch (bench_format) {
@@ -149,6 +160,14 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
BUG_ON(1);
break;
}
+
+out_free:
+ free(src);
+ free(dst);
+ return;
+out_alloc_failed:
+ printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+ goto out_free;
}
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
@@ -201,28 +220,14 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
-static void memcpy_alloc_mem(void **dst, void **src, size_t size)
-{
- *dst = zalloc(size);
- if (!*dst)
- die("memory allocation failed - maybe size is too large?\n");
-
- *src = zalloc(size);
- if (!*src)
- die("memory allocation failed - maybe size is too large?\n");
-
- /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
- memset(*src, 0, size);
-}
-
-static u64 do_memcpy_cycles(const struct function *r, size_t size)
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
- void *src = NULL, *dst = NULL;
memcpy_t fn = r->fn.memcpy;
int i;
- memcpy_alloc_mem(&dst, &src, size);
+ /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
+ memset(src, 0, size);
/*
* We prefault the freshly allocated memory range here,
@@ -235,20 +240,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size)
fn(dst, src, size);
cycle_end = get_cycles();
- free(src);
- free(dst);
return cycle_end - cycle_start;
}
-static double do_memcpy_gettimeofday(const struct function *r, size_t size)
+static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
{
struct timeval tv_start, tv_end, tv_diff;
memcpy_t fn = r->fn.memcpy;
- void *src = NULL, *dst = NULL;
int i;
- memcpy_alloc_mem(&dst, &src, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -262,9 +262,6 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size)
timersub(&tv_end, &tv_start, &tv_diff);
- free(src);
- free(dst);
-
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
}
@@ -294,27 +291,18 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unu
.do_cycles = do_memcpy_cycles,
.do_gettimeofday = do_memcpy_gettimeofday,
.usage = bench_mem_memcpy_usage,
+ .alloc_src = true,
};
return bench_mem_common(argc, argv, &info);
}
-static void memset_alloc_mem(void **dst, size_t size)
-{
- *dst = zalloc(size);
- if (!*dst)
- die("memory allocation failed - maybe size is too large?\n");
-}
-
-static u64 do_memset_cycles(const struct function *r, size_t size)
+static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
memset_t fn = r->fn.memset;
- void *dst = NULL;
int i;
- memset_alloc_mem(&dst, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -326,19 +314,15 @@ static u64 do_memset_cycles(const struct function *r, size_t size)
fn(dst, i, size);
cycle_end = get_cycles();
- free(dst);
return cycle_end - cycle_start;
}
-static double do_memset_gettimeofday(const struct function *r, size_t size)
+static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
{
struct timeval tv_start, tv_end, tv_diff;
memset_t fn = r->fn.memset;
- void *dst = NULL;
int i;
- memset_alloc_mem(&dst, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -352,7 +336,6 @@ static double do_memset_gettimeofday(const struct function *r, size_t size)
timersub(&tv_end, &tv_start, &tv_diff);
- free(dst);
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
}