aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/bench
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/breakpoint.c244
-rw-r--r--tools/perf/bench/epoll-ctl.c62
-rw-r--r--tools/perf/bench/epoll-wait.c60
-rw-r--r--tools/perf/bench/evlist-open-close.c4
-rw-r--r--tools/perf/bench/futex-hash.c61
-rw-r--r--tools/perf/bench/futex-lock-pi.c56
-rw-r--r--tools/perf/bench/futex-requeue.c56
-rw-r--r--tools/perf/bench/futex-wake-parallel.c56
-rw-r--r--tools/perf/bench/futex-wake.c57
-rw-r--r--tools/perf/bench/numa.c233
12 files changed, 635 insertions, 257 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 61d45fcb4057..6b6155a8ad09 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -14,6 +14,7 @@ perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
perf-y += inject-buildid.o
perf-y += evlist-open-close.o
+perf-y += breakpoint.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index b3480bc33fe8..6cefb4315d75 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -49,6 +49,8 @@ int bench_synthesize(int argc, const char **argv);
int bench_kallsyms_parse(int argc, const char **argv);
int bench_inject_build_id(int argc, const char **argv);
int bench_evlist_open_close(int argc, const char **argv);
+int bench_breakpoint_thread(int argc, const char **argv);
+int bench_breakpoint_enable(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/breakpoint.c b/tools/perf/bench/breakpoint.c
new file mode 100644
index 000000000000..41385f89ffc7
--- /dev/null
+++ b/tools/perf/bench/breakpoint.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/time64.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+
+struct {
+ unsigned int nbreakpoints;
+ unsigned int nparallel;
+ unsigned int nthreads;
+} thread_params = {
+ .nbreakpoints = 1,
+ .nparallel = 1,
+ .nthreads = 1,
+};
+
+static const struct option thread_options[] = {
+ OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
+ "Specify amount of breakpoints"),
+ OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
+ OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
+ OPT_END()
+};
+
+static const char * const thread_usage[] = {
+ "perf bench breakpoint thread <options>",
+ NULL
+};
+
+struct breakpoint {
+ int fd;
+ char watched;
+};
+
+static int breakpoint_setup(void *addr)
+{
+ struct perf_event_attr attr = { .size = 0, };
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.size = sizeof(attr);
+ attr.inherit = 1;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.bp_addr = (unsigned long)addr;
+ attr.bp_type = HW_BREAKPOINT_RW;
+ attr.bp_len = HW_BREAKPOINT_LEN_1;
+ return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+}
+
+static void *passive_thread(void *arg)
+{
+ unsigned int *done = (unsigned int *)arg;
+
+ while (!__atomic_load_n(done, __ATOMIC_RELAXED))
+ futex_wait(done, 0, NULL, 0);
+ return NULL;
+}
+
+static void *active_thread(void *arg)
+{
+ unsigned int *done = (unsigned int *)arg;
+
+ while (!__atomic_load_n(done, __ATOMIC_RELAXED));
+ return NULL;
+}
+
+static void *breakpoint_thread(void *arg)
+{
+ unsigned int i, done;
+ int *repeat = (int *)arg;
+ pthread_t *threads;
+
+ threads = calloc(thread_params.nthreads, sizeof(threads[0]));
+ if (!threads)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
+ done = 0;
+ for (i = 0; i < thread_params.nthreads; i++) {
+ if (pthread_create(&threads[i], NULL, passive_thread, &done))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+ futex_wake(&done, thread_params.nthreads, 0);
+ for (i = 0; i < thread_params.nthreads; i++)
+ pthread_join(threads[i], NULL);
+ }
+ free(threads);
+ return NULL;
+}
+
+// The benchmark creates nbreakpoints inheritable breakpoints,
+// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
+int bench_breakpoint_thread(int argc, const char **argv)
+{
+ unsigned int i, result_usec;
+ int repeat = bench_repeat;
+ struct breakpoint *breakpoints;
+ pthread_t *parallel;
+ struct timeval start, stop, diff;
+
+ if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
+ usage_with_options(thread_usage, thread_options);
+ exit(EXIT_FAILURE);
+ }
+ breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
+ parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
+ if (!breakpoints || !parallel)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ for (i = 0; i < thread_params.nbreakpoints; i++) {
+ breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
+ if (breakpoints[i].fd == -1)
+ exit((perror("perf_event_open"), EXIT_FAILURE));
+ }
+ gettimeofday(&start, NULL);
+ for (i = 0; i < thread_params.nparallel; i++) {
+ if (pthread_create(&parallel[i], NULL, breakpoint_thread, &repeat))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ for (i = 0; i < thread_params.nparallel; i++)
+ pthread_join(parallel[i], NULL);
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ for (i = 0; i < thread_params.nbreakpoints; i++)
+ close(breakpoints[i].fd);
+ free(parallel);
+ free(breakpoints);
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
+ bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / bench_repeat / thread_params.nthreads);
+ printf(" %14lf usecs/op/cpu\n",
+ (double)result_usec / bench_repeat /
+ thread_params.nthreads * thread_params.nparallel);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ break;
+ default:
+ fprintf(stderr, "Unknown format: %d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
+ return 0;
+}
+
+struct {
+ unsigned int npassive;
+ unsigned int nactive;
+} enable_params = {
+ .nactive = 0,
+ .npassive = 0,
+};
+
+static const struct option enable_options[] = {
+ OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
+ OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
+ OPT_END()
+};
+
+static const char * const enable_usage[] = {
+ "perf bench breakpoint enable <options>",
+ NULL
+};
+
+// The benchmark creates an inheritable breakpoint,
+// then starts npassive threads that block and nactive threads that actively spin
+// and then disables and enables the breakpoint bench_repeat times.
+int bench_breakpoint_enable(int argc, const char **argv)
+{
+ unsigned int i, nthreads, result_usec, done = 0;
+ char watched;
+ int fd;
+ pthread_t *threads;
+ struct timeval start, stop, diff;
+
+ if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
+ usage_with_options(enable_usage, enable_options);
+ exit(EXIT_FAILURE);
+ }
+ fd = breakpoint_setup(&watched);
+ if (fd == -1)
+ exit((perror("perf_event_open"), EXIT_FAILURE));
+ nthreads = enable_params.npassive + enable_params.nactive;
+ threads = calloc(nthreads, sizeof(threads[0]));
+ if (!threads)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ for (i = 0; i < nthreads; i++) {
+ if (pthread_create(&threads[i], NULL,
+ i < enable_params.npassive ? passive_thread : active_thread, &done))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ usleep(10000); // let the threads block
+ gettimeofday(&start, NULL);
+ for (i = 0; i < bench_repeat; i++) {
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
+ exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
+ exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
+ }
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+ futex_wake(&done, enable_params.npassive, 0);
+ for (i = 0; i < nthreads; i++)
+ pthread_join(threads[i], NULL);
+ free(threads);
+ close(fd);
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
+ bench_repeat, enable_params.npassive, enable_params.nactive);
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ break;
+ default:
+ fprintf(stderr, "Unknown format: %d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
+ return 0;
+}
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index ddaca75c3bc0..521d1ff97b06 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -23,6 +23,7 @@
#include <sys/eventfd.h>
#include <perf/cpumap.h>
+#include "../util/mutex.h"
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include "bench.h"
@@ -58,10 +59,10 @@ static unsigned int nested = 0;
/* amount of fds to monitor, per thread */
static unsigned int nfds = 64;
-static pthread_mutex_t thread_lock;
+static struct mutex thread_lock;
static unsigned int threads_starting;
static struct stats all_stats[EPOLL_NR_OPS];
-static pthread_cond_t thread_parent, thread_worker;
+static struct cond thread_parent, thread_worker;
struct worker {
int tid;
@@ -106,7 +107,7 @@ static void nest_epollfd(void)
printinfo("Nesting level(s): %d\n", nested);
epollfdp = calloc(nested, sizeof(int));
- if (!epollfd)
+ if (!epollfdp)
err(EXIT_FAILURE, "calloc");
for (i = 0; i < nested; i++) {
@@ -174,12 +175,12 @@ static void *workerfn(void *arg)
struct timespec ts = { .tv_sec = 0,
.tv_nsec = 250 };
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
/* Let 'em loose */
do {
@@ -222,13 +223,20 @@ static void init_fdmaps(struct worker *w, int pct)
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i, j;
int ret = 0;
+ int nrcpus;
+ size_t size;
if (!noaffinity)
pthread_attr_init(&thread_attr);
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
@@ -252,22 +260,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
init_fdmaps(w, 50);
if (!noaffinity) {
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+ size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
@@ -333,7 +347,7 @@ int bench_epoll_ctl(int argc, const char **argv)
/* default to the number of CPUs */
if (!nthreads)
- nthreads = cpu->nr;
+ nthreads = perf_cpu_map__nr(cpu);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -354,9 +368,9 @@ int bench_epoll_ctl(int argc, const char **argv)
for (i = 0; i < EPOLL_NR_OPS; i++)
init_stats(&all_stats[i]);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
threads_starting = nthreads;
@@ -364,11 +378,11 @@ int bench_epoll_ctl(int argc, const char **argv)
do_threads(worker, cpu);
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
sleep(nsecs);
toggle_done(0, NULL, NULL);
@@ -381,9 +395,9 @@ int bench_epoll_ctl(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
unsigned long t[EPOLL_NR_OPS];
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 79d13dbc0a47..c1cdf03c075d 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -79,6 +79,7 @@
#include <perf/cpumap.h>
#include "../util/stat.h"
+#include "../util/mutex.h"
#include <subcmd/parse-options.h>
#include "bench.h"
@@ -109,10 +110,10 @@ static bool multiq; /* use an epoll instance per thread */
/* amount of fds to monitor, per thread */
static unsigned int nfds = 64;
-static pthread_mutex_t thread_lock;
+static struct mutex thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
-static pthread_cond_t thread_parent, thread_worker;
+static struct cond thread_parent, thread_worker;
struct worker {
int tid;
@@ -189,12 +190,12 @@ static void *workerfn(void *arg)
int to = nonblocking? 0 : -1;
int efd = multiq ? w->epollfd : epollfd;
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
do {
/*
@@ -291,9 +292,11 @@ static void print_summary(void)
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
{
pthread_attr_t thread_attr, *attrp = NULL;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i, j;
int ret = 0, events = EPOLLIN;
+ int nrcpus;
+ size_t size;
if (oneshot)
events |= EPOLLONESHOT;
@@ -306,6 +309,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity)
pthread_attr_init(&thread_attr);
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < nthreads; i++) {
struct worker *w = &worker[i];
@@ -341,22 +349,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
}
if (!noaffinity) {
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+ size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
attrp = &thread_attr;
}
ret = pthread_create(&w->thread, attrp, workerfn,
(void *)(struct worker *) w);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
if (!noaffinity)
pthread_attr_destroy(&thread_attr);
@@ -452,7 +466,7 @@ int bench_epoll_wait(int argc, const char **argv)
/* default to the number of CPUs and leave one for the writer pthread */
if (!nthreads)
- nthreads = cpu->nr - 1;
+ nthreads = perf_cpu_map__nr(cpu) - 1;
worker = calloc(nthreads, sizeof(*worker));
if (!worker) {
@@ -472,9 +486,9 @@ int bench_epoll_wait(int argc, const char **argv)
getpid(), nthreads, oneshot ? " (EPOLLONESHOT semantics)": "", nfds, nsecs);
init_stats(&throughput_stats);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
threads_starting = nthreads;
@@ -482,11 +496,11 @@ int bench_epoll_wait(int argc, const char **argv)
do_threads(worker, cpu);
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
/*
* At this point the workers should be blocked waiting for read events
@@ -509,9 +523,9 @@ int bench_epoll_wait(int argc, const char **argv)
err(EXIT_FAILURE, "pthread_join");
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
/* sort the array back before reporting */
if (randomize)
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index 482738e9bdad..5a27691469ed 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist)
int cnt = 0;
evlist__for_each_entry(evlist, evsel)
- cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+ cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus);
return cnt;
}
@@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr)
init_stats(&time_stats);
- printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr);
+ printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.user_requested_cpus));
printf(" Number of threads:\t%d\n", evlist->core.threads->nr);
printf(" Number of events:\t%d (%d fds)\n",
evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fcdea3e44937..2005a3fa3026 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -23,6 +23,7 @@
#include <sys/mman.h>
#include <perf/cpumap.h>
+#include "../util/mutex.h"
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include "bench.h"
@@ -34,10 +35,10 @@ static bool done = false;
static int futex_flag = 0;
struct timeval bench__start, bench__end, bench__runtime;
-static pthread_mutex_t thread_lock;
+static struct mutex thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
-static pthread_cond_t thread_parent, thread_worker;
+static struct cond thread_parent, thread_worker;
struct worker {
int tid;
@@ -73,12 +74,12 @@ static void *workerfn(void *arg)
unsigned int i;
unsigned long ops = w->ops; /* avoid cacheline bouncing */
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
do {
for (i = 0; i < params.nfutexes; i++, ops++) {
@@ -122,12 +123,14 @@ static void print_summary(void)
int bench_futex_hash(int argc, const char **argv)
{
int ret = 0;
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
struct sigaction act;
unsigned int i;
pthread_attr_t thread_attr;
struct worker *worker = NULL;
struct perf_cpu_map *cpu;
+ int nrcpus;
+ size_t size;
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
if (argc) {
@@ -150,7 +153,7 @@ int bench_futex_hash(int argc, const char **argv)
}
if (!params.nthreads) /* default to the number of CPUs */
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -163,39 +166,49 @@ int bench_futex_hash(int argc, const char **argv)
getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
init_stats(&throughput_stats);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
threads_starting = params.nthreads;
pthread_attr_init(&thread_attr);
gettimeofday(&bench__start, NULL);
+
+ nrcpus = perf_cpu_map__nr(cpu);
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < params.nthreads; i++) {
worker[i].tid = i;
worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
if (!worker[i].futex)
goto errmem;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
- if (ret)
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
+ ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
-
+ }
ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
(void *)(struct worker *) &worker[i]);
- if (ret)
+ if (ret) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
pthread_attr_destroy(&thread_attr);
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
sleep(params.runtime);
toggle_done(0, NULL, NULL);
@@ -207,9 +220,9 @@ int bench_futex_hash(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
for (i = 0; i < params.nthreads; i++) {
unsigned long t = bench__runtime.tv_sec > 0 ?
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 137890f78e17..2d0417949727 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -8,6 +8,7 @@
#include <pthread.h>
#include <signal.h>
+#include "../util/mutex.h"
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
@@ -34,10 +35,10 @@ static u_int32_t global_futex = 0;
static struct worker *worker;
static bool done = false;
static int futex_flag = 0;
-static pthread_mutex_t thread_lock;
+static struct mutex thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
-static pthread_cond_t thread_parent, thread_worker;
+static struct cond thread_parent, thread_worker;
static struct bench_futex_parameters params = {
.runtime = 10,
@@ -83,12 +84,12 @@ static void *workerfn(void *arg)
struct worker *w = (struct worker *) arg;
unsigned long ops = w->ops;
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
do {
int ret;
@@ -120,11 +121,17 @@ static void *workerfn(void *arg)
static void create_threads(struct worker *w, pthread_attr_t thread_attr,
struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
for (i = 0; i < params.nthreads; i++) {
worker[i].tid = i;
@@ -135,15 +142,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
} else
worker[i].futex = &global_futex;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+ if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
int bench_futex_lock_pi(int argc, const char **argv)
@@ -173,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -186,9 +198,9 @@ int bench_futex_lock_pi(int argc, const char **argv)
getpid(), params.nthreads, params.runtime);
init_stats(&throughput_stats);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
threads_starting = params.nthreads;
pthread_attr_init(&thread_attr);
@@ -197,11 +209,11 @@ int bench_futex_lock_pi(int argc, const char **argv)
create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
sleep(params.runtime);
toggle_done(0, NULL, NULL);
@@ -213,9 +225,9 @@ int bench_futex_lock_pi(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
for (i = 0; i < params.nthreads; i++) {
unsigned long t = bench__runtime.tv_sec > 0 ?
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f7a5ffebb940..69ad896f556c 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -15,6 +15,7 @@
#include <pthread.h>
#include <signal.h>
+#include "../util/mutex.h"
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
@@ -34,8 +35,8 @@ static u_int32_t futex1 = 0, futex2 = 0;
static pthread_t *worker;
static bool done = false;
-static pthread_mutex_t thread_lock;
-static pthread_cond_t thread_parent, thread_worker;
+static struct mutex thread_lock;
+static struct cond thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
static unsigned int threads_starting;
static int futex_flag = 0;
@@ -82,12 +83,12 @@ static void *workerfn(void *arg __maybe_unused)
{
int ret;
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
while (1) {
if (!params.pi) {
@@ -123,22 +124,33 @@ static void *workerfn(void *arg __maybe_unused)
static void block_threads(pthread_t *w,
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void toggle_done(int sig __maybe_unused,
@@ -175,7 +187,7 @@ int bench_futex_requeue(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -198,9 +210,9 @@ int bench_futex_requeue(int argc, const char **argv)
init_stats(&requeued_stats);
init_stats(&requeuetime_stats);
pthread_attr_init(&thread_attr);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nrequeued = 0, wakeups = 0;
@@ -210,11 +222,11 @@ int bench_futex_requeue(int argc, const char **argv)
block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
usleep(100000);
@@ -286,9 +298,9 @@ int bench_futex_requeue(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 0983f40b4b40..6682e49d0ee0 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -10,6 +10,7 @@
#include "bench.h"
#include <linux/compiler.h>
#include "../util/debug.h"
+#include "../util/mutex.h"
#ifndef HAVE_PTHREAD_BARRIER
int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
@@ -49,8 +50,8 @@ static u_int32_t futex = 0;
static pthread_t *blocked_worker;
static bool done = false;
-static pthread_mutex_t thread_lock;
-static pthread_cond_t thread_parent, thread_worker;
+static struct mutex thread_lock;
+static struct cond thread_parent, thread_worker;
static pthread_barrier_t barrier;
static struct stats waketime_stats, wakeup_stats;
static unsigned int threads_starting;
@@ -125,12 +126,12 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
static void *blocked_workerfn(void *arg __maybe_unused)
{
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
while (1) { /* handle spurious wakeups */
if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
@@ -144,22 +145,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
+ int nrcpus = perf_cpu_map__nr(cpu);
+ size_t size;
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void print_run(struct thread_data *waking_worker, unsigned int run_num)
@@ -252,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
/* some sanity checks */
if (params.nwakes > params.nthreads ||
@@ -283,9 +295,9 @@ int bench_futex_wake_parallel(int argc, const char **argv)
init_stats(&waketime_stats);
pthread_attr_init(&thread_attr);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
for (j = 0; j < bench_repeat && !done; j++) {
waking_worker = calloc(params.nwakes, sizeof(*waking_worker));
@@ -296,11 +308,11 @@ int bench_futex_wake_parallel(int argc, const char **argv)
block_threads(blocked_worker, thread_attr, cpu);
/* make sure all threads are already blocked */
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
usleep(100000);
@@ -321,9 +333,9 @@ int bench_futex_wake_parallel(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 2226a475e782..9ecab6620a87 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -14,6 +14,7 @@
#include <pthread.h>
#include <signal.h>
+#include "../util/mutex.h"
#include "../util/stat.h"
#include <subcmd/parse-options.h>
#include <linux/compiler.h>
@@ -34,8 +35,8 @@ static u_int32_t futex1 = 0;
static pthread_t *worker;
static bool done = false;
-static pthread_mutex_t thread_lock;
-static pthread_cond_t thread_parent, thread_worker;
+static struct mutex thread_lock;
+static struct cond thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
static unsigned int threads_starting;
static int futex_flag = 0;
@@ -65,12 +66,12 @@ static const char * const bench_futex_wake_usage[] = {
static void *workerfn(void *arg __maybe_unused)
{
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
- pthread_cond_signal(&thread_parent);
- pthread_cond_wait(&thread_worker, &thread_lock);
- pthread_mutex_unlock(&thread_lock);
+ cond_signal(&thread_parent);
+ cond_wait(&thread_worker, &thread_lock);
+ mutex_unlock(&thread_lock);
while (1) {
if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
@@ -97,22 +98,32 @@ static void print_summary(void)
static void block_threads(pthread_t *w,
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
{
- cpu_set_t cpuset;
+ cpu_set_t *cpuset;
unsigned int i;
-
+ size_t size;
+ int nrcpus = perf_cpu_map__nr(cpu);
threads_starting = params.nthreads;
+ cpuset = CPU_ALLOC(nrcpus);
+ BUG_ON(!cpuset);
+ size = CPU_ALLOC_SIZE(nrcpus);
+
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
- CPU_ZERO(&cpuset);
- CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
- if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+ CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
+ }
}
+ CPU_FREE(cpuset);
}
static void toggle_done(int sig __maybe_unused,
@@ -151,7 +162,7 @@ int bench_futex_wake(int argc, const char **argv)
}
if (!params.nthreads)
- params.nthreads = cpu->nr;
+ params.nthreads = perf_cpu_map__nr(cpu);
worker = calloc(params.nthreads, sizeof(*worker));
if (!worker)
@@ -168,9 +179,9 @@ int bench_futex_wake(int argc, const char **argv)
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
pthread_attr_init(&thread_attr);
- pthread_mutex_init(&thread_lock, NULL);
- pthread_cond_init(&thread_parent, NULL);
- pthread_cond_init(&thread_worker, NULL);
+ mutex_init(&thread_lock);
+ cond_init(&thread_parent);
+ cond_init(&thread_worker);
for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nwoken = 0;
@@ -180,11 +191,11 @@ int bench_futex_wake(int argc, const char **argv)
block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
- pthread_mutex_lock(&thread_lock);
+ mutex_lock(&thread_lock);
while (threads_starting)
- pthread_cond_wait(&thread_parent, &thread_lock);
- pthread_cond_broadcast(&thread_worker);
- pthread_mutex_unlock(&thread_lock);
+ cond_wait(&thread_parent, &thread_lock);
+ cond_broadcast(&thread_worker);
+ mutex_unlock(&thread_lock);
usleep(100000);
@@ -214,9 +225,9 @@ int bench_futex_wake(int argc, const char **argv)
}
/* cleanup & report results */
- pthread_cond_destroy(&thread_parent);
- pthread_cond_destroy(&thread_worker);
- pthread_mutex_destroy(&thread_lock);
+ cond_destroy(&thread_parent);
+ cond_destroy(&thread_worker);
+ mutex_destroy(&thread_lock);
pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f2640179ada9..e78dedf9e682 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -6,8 +6,6 @@
*/
#include <inttypes.h>
-/* For the CLR_() macros */
-#include <pthread.h>
#include <subcmd/parse-options.h>
#include "../util/cloexec.h"
@@ -34,6 +32,8 @@
#include <linux/numa.h>
#include <linux/zalloc.h>
+#include "../util/header.h"
+#include "../util/mutex.h"
#include <numa.h>
#include <numaif.h>
@@ -54,7 +54,7 @@
struct thread_data {
int curr_cpu;
- cpu_set_t bind_cpumask;
+ cpu_set_t *bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
@@ -66,7 +66,7 @@ struct thread_data {
u64 system_time_ns;
u64 user_time_ns;
double speed_gbs;
- pthread_mutex_t *process_lock;
+ struct mutex *process_lock;
};
/* Parameters set by options: */
@@ -136,16 +136,16 @@ struct params {
struct global_info {
u8 *data;
- pthread_mutex_t startup_mutex;
- pthread_cond_t startup_cond;
+ struct mutex startup_mutex;
+ struct cond startup_cond;
int nr_tasks_started;
- pthread_mutex_t start_work_mutex;
- pthread_cond_t start_work_cond;
+ struct mutex start_work_mutex;
+ struct cond start_work_cond;
int nr_tasks_working;
bool start_work;
- pthread_mutex_t stop_work_mutex;
+ struct mutex stop_work_mutex;
u64 bytes_done;
struct thread_data *threads;
@@ -266,71 +266,117 @@ static bool node_has_cpus(int node)
return ret;
}
-static cpu_set_t bind_to_cpu(int target_cpu)
+static cpu_set_t *bind_to_cpu(int target_cpu)
{
- cpu_set_t orig_mask, mask;
- int ret;
+ int nrcpus = numa_num_possible_cpus();
+ cpu_set_t *orig_mask, *mask;
+ size_t size;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
+
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
- CPU_ZERO(&mask);
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
+
+ CPU_ZERO_S(size, mask);
if (target_cpu == -1) {
int cpu;
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
- BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
- CPU_SET(target_cpu, &mask);
+ if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
+ goto err;
+
+ CPU_SET_S(target_cpu, size, mask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static cpu_set_t bind_to_node(int target_node)
+static cpu_set_t *bind_to_node(int target_node)
{
- cpu_set_t orig_mask, mask;
+ int nrcpus = numa_num_possible_cpus();
+ size_t size;
+ cpu_set_t *orig_mask, *mask;
int cpu;
- int ret;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
+
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
+
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
- CPU_ZERO(&mask);
+ CPU_ZERO_S(size, mask);
if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(!cpumask);
+ if (!cpumask)
+ goto err;
+
if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
}
}
numa_free_cpumask(cpumask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static void bind_to_cpumask(cpu_set_t mask)
+static void bind_to_cpumask(cpu_set_t *mask)
{
int ret;
+ size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ ret = sched_setaffinity(0, size, mask);
+ if (ret) {
+ CPU_FREE(mask);
+ BUG_ON(ret);
+ }
}
static void mempol_restore(void)
@@ -376,7 +422,7 @@ do { \
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask = NULL;
ssize_t bytes;
u8 *buf;
int ret;
@@ -434,6 +480,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
mempol_restore();
}
@@ -476,30 +523,6 @@ static void * setup_private_data(ssize_t bytes)
return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
}
-/*
- * Return a process-shared (global) mutex:
- */
-static void init_global_mutex(pthread_mutex_t *mutex)
-{
- pthread_mutexattr_t attr;
-
- pthread_mutexattr_init(&attr);
- pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
- pthread_mutex_init(mutex, &attr);
-}
-
-/*
- * Return a process-shared (global) condition variable:
- */
-static void init_global_cond(pthread_cond_t *cond)
-{
- pthread_condattr_t attr;
-
- pthread_condattr_init(&attr);
- pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
- pthread_cond_init(cond, &attr);
-}
-
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
@@ -585,10 +608,16 @@ static int parse_setup_cpu_list(void)
return -1;
}
+ if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
+ printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
+ return -1;
+ }
+
BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int i;
for (i = 0; i < mul; i++) {
@@ -608,10 +637,15 @@ static int parse_setup_cpu_list(void)
tprintf("%2d", bind_cpu);
}
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(size, td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
- BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
- CPU_SET(cpu, &td->bind_cpumask);
+ if (cpu < 0 || cpu >= g->p.nr_cpus) {
+ CPU_FREE(td->bind_cpumask);
+ BUG_ON(-1);
+ }
+ CPU_SET_S(cpu, size, td->bind_cpumask);
}
t++;
}
@@ -752,8 +786,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return parse_node_list(arg);
}
-#define BIT(x) (1ul << x)
-
static inline uint32_t lfsr_32(uint32_t lfsr)
{
const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
@@ -1163,22 +1195,22 @@ static void *worker_thread(void *__tdata)
}
if (g->p.serialize_startup) {
- pthread_mutex_lock(&g->startup_mutex);
+ mutex_lock(&g->startup_mutex);
g->nr_tasks_started++;
/* The last thread wakes the main process. */
if (g->nr_tasks_started == g->p.nr_tasks)
- pthread_cond_signal(&g->startup_cond);
+ cond_signal(&g->startup_cond);
- pthread_mutex_unlock(&g->startup_mutex);
+ mutex_unlock(&g->startup_mutex);
/* Here we will wait for the main process to start us all at once: */
- pthread_mutex_lock(&g->start_work_mutex);
+ mutex_lock(&g->start_work_mutex);
g->start_work = false;
g->nr_tasks_working++;
while (!g->start_work)
- pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
+ cond_wait(&g->start_work_cond, &g->start_work_mutex);
- pthread_mutex_unlock(&g->start_work_mutex);
+ mutex_unlock(&g->start_work_mutex);
}
gettimeofday(&start0, NULL);
@@ -1197,17 +1229,17 @@ static void *worker_thread(void *__tdata)
val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
if (g->p.sleep_usecs) {
- pthread_mutex_lock(td->process_lock);
+ mutex_lock(td->process_lock);
usleep(g->p.sleep_usecs);
- pthread_mutex_unlock(td->process_lock);
+ mutex_unlock(td->process_lock);
}
/*
* Amount of work to be done under a process-global lock:
*/
if (g->p.bytes_process_locked) {
- pthread_mutex_lock(td->process_lock);
+ mutex_lock(td->process_lock);
val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
- pthread_mutex_unlock(td->process_lock);
+ mutex_unlock(td->process_lock);
}
work_done = g->p.bytes_global + g->p.bytes_process +
@@ -1241,7 +1273,7 @@ static void *worker_thread(void *__tdata)
* by migrating to CPU#0:
*/
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask;
int target_cpu;
int this_cpu;
@@ -1265,6 +1297,7 @@ static void *worker_thread(void *__tdata)
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
}
if (details >= 3) {
@@ -1303,9 +1336,9 @@ static void *worker_thread(void *__tdata)
free_data(thread_data, g->p.bytes_thread);
- pthread_mutex_lock(&g->stop_work_mutex);
+ mutex_lock(&g->stop_work_mutex);
g->bytes_done += bytes_done;
- pthread_mutex_unlock(&g->stop_work_mutex);
+ mutex_unlock(&g->stop_work_mutex);
return NULL;
}
@@ -1315,7 +1348,7 @@ static void *worker_thread(void *__tdata)
*/
static void worker_process(int process_nr)
{
- pthread_mutex_t process_lock;
+ struct mutex process_lock;
struct thread_data *td;
pthread_t *pthreads;
u8 *process_data;
@@ -1323,7 +1356,7 @@ static void worker_process(int process_nr)
int ret;
int t;
- pthread_mutex_init(&process_lock, NULL);
+ mutex_init(&process_lock);
set_taskname("process %d", process_nr);
/*
@@ -1398,21 +1431,31 @@ static void init_thread_data(void)
for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
+ size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int cpu;
/* Allow all nodes by default: */
td->bind_node = NUMA_NO_NODE;
/* Allow all CPUs by default: */
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(cpuset_size, td->bind_cpumask);
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &td->bind_cpumask);
+ CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
}
}
static void deinit_thread_data(void)
{
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ /* Free the bind_cpumask allocated for thread_data */
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ CPU_FREE(td->bind_cpumask);
+ }
free_data(g->threads, size);
}
@@ -1472,11 +1515,11 @@ static int init(void)
g->data = setup_shared_data(g->p.bytes_global);
/* Startup serialization: */
- init_global_mutex(&g->start_work_mutex);
- init_global_cond(&g->start_work_cond);
- init_global_mutex(&g->startup_mutex);
- init_global_cond(&g->startup_cond);
- init_global_mutex(&g->stop_work_mutex);
+ mutex_init_pshared(&g->start_work_mutex);
+ cond_init_pshared(&g->start_work_cond);
+ mutex_init_pshared(&g->startup_mutex);
+ cond_init_pshared(&g->startup_cond);
+ mutex_init_pshared(&g->stop_work_mutex);
init_thread_data();
@@ -1565,17 +1608,17 @@ static int __bench_numa(const char *name)
* Wait for all the threads to start up. The last thread will
* signal this process.
*/
- pthread_mutex_lock(&g->startup_mutex);
+ mutex_lock(&g->startup_mutex);
while (g->nr_tasks_started != g->p.nr_tasks)
- pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
+ cond_wait(&g->startup_cond, &g->startup_mutex);
- pthread_mutex_unlock(&g->startup_mutex);
+ mutex_unlock(&g->startup_mutex);
/* Wait for all threads to be at the start_work_cond. */
while (!threads_ready) {
- pthread_mutex_lock(&g->start_work_mutex);
+ mutex_lock(&g->start_work_mutex);
threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
- pthread_mutex_unlock(&g->start_work_mutex);
+ mutex_unlock(&g->start_work_mutex);
if (!threads_ready)
usleep(1);
}
@@ -1593,10 +1636,10 @@ static int __bench_numa(const char *name)
start = stop;
/* Start all threads running. */
- pthread_mutex_lock(&g->start_work_mutex);
+ mutex_lock(&g->start_work_mutex);
g->start_work = true;
- pthread_mutex_unlock(&g->start_work_mutex);
- pthread_cond_broadcast(&g->start_work_cond);
+ mutex_unlock(&g->start_work_mutex);
+ cond_broadcast(&g->start_work_cond);
} else {
gettimeofday(&start, NULL);
}
@@ -1672,7 +1715,7 @@ static int __bench_numa(const char *name)
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
- char tname[14 + 2 * 10 + 1];
+ char tname[14 + 2 * 11 + 1];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {