aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
committerIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
commit62e1c09418fc16d27720b128275cac61367e2c1b (patch)
tree4759aa6662b1398e2b93696ace58f6f309722b06 /tools/perf/builtin-record.c
parentMerge tag 'media/v5.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (diff)
parentperf stat: Support 'percore' event qualifier (diff)
downloadlinux-dev-62e1c09418fc16d27720b128275cac61367e2c1b.tar.xz
linux-dev-62e1c09418fc16d27720b128275cac61367e2c1b.zip
Merge tag 'perf-core-for-mingo-5.2-20190517' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf.data: Alexey Budankov: - Streaming compression of perf ring buffer into PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x perf.data file size reduction on variety of tested workloads what saves storage space on larger server systems where perf.data size can easily reach several tens or even hundreds of GiBs, especially when profiling with DWARF-based stacks and tracing of context switches. perf record: Arnaldo Carvalho de Melo - Improve -user-regs/intr-regs suggestions to overcome errors. perf annotate: Jin Yao: - Remove hist__account_cycles() from callback, speeding up branch processing (perf record -b). perf stat: - Add a 'percore' event qualifier, e.g.: -e cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for both hardware threads in a core. We can already do this with --per-core, but it's often useful to do this together with other metrics that are collected per hardware thread. I.e. now its possible to do this per-event, and have it mixed with other events not aggregated by core. core libraries: Donald Yandt: - Check for errors when doing fgets(/proc/version). Jiri Olsa: - Speed up report for perf compiled with linbunwind. tools headers: Arnaldo Carvalho de Melo - Update memcpy_64.S, x86's kvm.h and pt_regs.h. arm64: Florian Fainelli: - Map Brahma-B53 CPUID to cortex-a53 events. - Add Cortex-A57 and Cortex-A72 events. csky: Mao Han: - Add DWARF register mappings for libdw, allowing --call-graph=dwarf to work on the C-SKY arch. x86: Andi Kleen/Kan Liang: - Add support for recording and printing XMM registers, available, for instance, on Icelake. Kan Liang: - Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON support. UPI replaced the Intel QuickPath Interconnect (QPI) in Xeon Skylake-SP. Intel PT: Adrian Hunter . Fix instructions sampling rate. . Timestamp fixes. . Improve exported-sql-viewer GUI, allowing, for instance, to copy'n'paste the trees, useful for e-mailing. Documentation: Thomas Richter: - Add description for 'perf --debug stderr=1', which redirects stderr to stdout. libtraceevent: Tzvetomir Stoyanov: - Add man pages for the various APIs. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c229
1 files changed, 197 insertions, 32 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c5e10552776a..e2c3a585a61e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -133,6 +133,11 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
+static int record__aio_enabled(struct record *rec);
+static int record__comp_enabled(struct record *rec);
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size);
+
#ifdef HAVE_AIO_SUPPORT
static int record__aio_write(struct aiocb *cblock, int trace_fd,
void *buf, size_t size, off_t off)
@@ -183,9 +188,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
if (rem_size == 0) {
cblock->aio_fildes = -1;
/*
- * md->refcount is incremented in perf_mmap__push() for
- * every enqueued aio write request so decrement it because
- * the request is now complete.
+ * md->refcount is incremented in record__aio_pushfn() for
+ * every aio write request started in record__aio_push() so
+ * decrement it because the request is now complete.
*/
perf_mmap__put(md);
rc = 1;
@@ -240,18 +245,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all)
} while (1);
}
-static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+struct record_aio {
+ struct record *rec;
+ void *data;
+ size_t size;
+};
+
+static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
{
- struct record *rec = to;
- int ret, trace_fd = rec->session->data->file.fd;
+ struct record_aio *aio = to;
- rec->samples++;
+ /*
+ * map->base data pointed by buf is copied into free map->aio.data[] buffer
+ * to release space in the kernel buffer as fast as possible, calling
+ * perf_mmap__consume() from perf_mmap__push() function.
+ *
+ * That lets the kernel to proceed with storing more profiling data into
+ * the kernel buffer earlier than other per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of profiling data
+ * crosses the upper bound of the kernel buffer. In this case we first move
+ * part of data from map->start till the upper bound and then the reminder
+ * from the beginning of the kernel buffer till the end of the data chunk.
+ */
+
+ if (record__comp_enabled(aio->rec)) {
+ size = zstd_compress(aio->rec->session, aio->data + aio->size,
+ perf_mmap__mmap_len(map) - aio->size,
+ buf, size);
+ } else {
+ memcpy(aio->data + aio->size, buf, size);
+ }
+
+ if (!aio->size) {
+ /*
+ * Increment map->refcount to guard map->aio.data[] buffer
+ * from premature deallocation because map object can be
+ * released earlier than aio write request started on
+ * map->aio.data[] buffer is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started aio request completion or at record__aio_push()
+ * if the request failed to start.
+ */
+ perf_mmap__get(map);
+ }
+
+ aio->size += size;
+
+ return size;
+}
- ret = record__aio_write(cblock, trace_fd, bf, size, off);
+static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
+{
+ int ret, idx;
+ int trace_fd = rec->session->data->file.fd;
+ struct record_aio aio = { .rec = rec, .size = 0 };
+
+ /*
+ * Call record__aio_sync() to wait till map->aio.data[] buffer
+ * becomes available after previous aio write operation.
+ */
+
+ idx = record__aio_sync(map, false);
+ aio.data = map->aio.data[idx];
+ ret = perf_mmap__push(map, &aio, record__aio_pushfn);
+ if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
+ return ret;
+
+ rec->samples++;
+ ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
if (!ret) {
- rec->bytes_written += size;
+ *off += aio.size;
+ rec->bytes_written += aio.size;
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
+ } else {
+ /*
+ * Decrement map->refcount incremented in record__aio_pushfn()
+ * back if record__aio_write() operation failed to start, otherwise
+ * map->refcount is decremented in record__aio_complete() after
+ * aio write operation finishes successfully.
+ */
+ perf_mmap__put(map);
}
return ret;
@@ -273,7 +349,7 @@ static void record__aio_mmap_read_sync(struct record *rec)
struct perf_evlist *evlist = rec->evlist;
struct perf_mmap *maps = evlist->mmap;
- if (!rec->opts.nr_cblocks)
+ if (!record__aio_enabled(rec))
return;
for (i = 0; i < evlist->nr_mmaps; i++) {
@@ -307,13 +383,8 @@ static int record__aio_parse(const struct option *opt,
#else /* HAVE_AIO_SUPPORT */
static int nr_cblocks_max = 0;
-static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
-{
- return -1;
-}
-
-static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
- void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
+ off_t *off __maybe_unused)
{
return -1;
}
@@ -372,6 +443,32 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+#ifdef HAVE_ZSTD_SUPPORT
+static unsigned int comp_level_default = 1;
+
+static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = opt->value;
+
+ if (unset) {
+ opts->comp_level = 0;
+ } else {
+ if (str)
+ opts->comp_level = strtol(str, NULL, 0);
+ if (!opts->comp_level)
+ opts->comp_level = comp_level_default;
+ }
+
+ return 0;
+}
+#endif
+static unsigned int comp_level_max = 22;
+
+static int record__comp_enabled(struct record *rec)
+{
+ return rec->opts.comp_level > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -385,6 +482,11 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size
{
struct record *rec = to;
+ if (record__comp_enabled(rec)) {
+ size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size);
+ bf = map->data;
+ }
+
rec->samples++;
return record__write(rec, map, bf, size);
}
@@ -582,7 +684,7 @@ static int record__mmap_evlist(struct record *rec,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
opts->nr_cblocks, opts->affinity,
- opts->mmap_flush) < 0) {
+ opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -771,6 +873,37 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
}
+static size_t process_comp_header(void *record, size_t increment)
+{
+ struct compressed_event *event = record;
+ size_t size = sizeof(*event);
+
+ if (increment) {
+ event->header.size += increment;
+ return increment;
+ }
+
+ event->header.type = PERF_RECORD_COMPRESSED;
+ event->header.size = size;
+
+ return size;
+}
+
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size)
+{
+ size_t compressed;
+ size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1;
+
+ compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
+ max_record_size, process_comp_header);
+
+ session->bytes_transferred += src_size;
+ session->bytes_compressed += compressed;
+
+ return compressed;
+}
+
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
bool overwrite, bool synch)
{
@@ -779,7 +912,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
int rc = 0;
struct perf_mmap *maps;
int trace_fd = rec->data.file.fd;
- off_t off;
+ off_t off = 0;
if (!evlist)
return 0;
@@ -805,20 +938,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
map->flush = 1;
}
if (!record__aio_enabled(rec)) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(map, rec, record__pushfn) < 0) {
if (synch)
map->flush = flush;
rc = -1;
goto out;
}
} else {
- int idx;
- /*
- * Call record__aio_sync() to wait till map->data buffer
- * becomes available after previous aio write request.
- */
- idx = record__aio_sync(map, false);
- if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
+ if (record__aio_push(rec, map, &off) < 0) {
record__aio_set_pos(trace_fd, off);
if (synch)
map->flush = flush;
@@ -888,6 +1015,8 @@ static void record__init_features(struct record *rec)
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
+ if (!record__comp_enabled(rec))
+ perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -1186,6 +1315,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
bool disabled = false, draining = false;
struct perf_evlist *sb_evlist = NULL;
int fd;
+ float ratio = 0;
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
@@ -1215,6 +1345,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
fd = perf_data__fd(data);
rec->session = session;
+ if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
+ pr_err("Compression initialization failed.\n");
+ return -1;
+ }
+
+ session->header.env.comp_type = PERF_COMP_ZSTD;
+ session->header.env.comp_level = rec->opts.comp_level;
+
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@@ -1244,6 +1382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = -1;
goto out_child;
}
+ session->header.env.comp_mmap_len = session->evlist->mmap_len;
err = bpf__apply_obj_config();
if (err) {
@@ -1491,6 +1630,11 @@ out_child:
record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
+ if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
+ ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
+ session->header.env.comp_ratio = ratio + 0.5;
+ }
+
if (forks) {
int exit_status;
@@ -1537,12 +1681,19 @@ out_child:
else
samples[0] = '\0';
- fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
+ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
perf_data__size(data) / 1024.0 / 1024.0,
data->path, postfix, samples);
+ if (ratio) {
+ fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
+ rec->session->bytes_transferred / 1024.0 / 1024.0,
+ ratio);
+ }
+ fprintf(stderr, " ]\n");
}
out_delete_session:
+ zstd_fini(&session->zstd_data);
perf_session__delete(session);
if (!opts->no_bpf_event)
@@ -2017,10 +2168,10 @@ static struct option __record_options[] = {
"use per-thread mmaps"),
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
"sample selected machine registers on interrupt,"
- " use -I ? to list register names", parse_regs),
+ " use '-I?' to list register names", parse_intr_regs),
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
"sample selected machine registers on interrupt,"
- " use -I ? to list register names", parse_regs),
+ " use '--user-regs=?' to list register names", parse_user_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
@@ -2068,6 +2219,11 @@ static struct option __record_options[] = {
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
+#ifdef HAVE_ZSTD_SUPPORT
+ OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
+ "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
+ record__parse_comp_level),
+#endif
OPT_END()
};
@@ -2127,6 +2283,12 @@ int cmd_record(int argc, const char **argv)
"cgroup monitoring only available in system-wide mode");
}
+
+ if (rec->opts.comp_level != 0) {
+ pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
+ rec->no_buildid = true;
+ }
+
if (rec->opts.record_switch_events &&
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events\n");
@@ -2272,12 +2434,15 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.nr_cblocks > nr_cblocks_max)
rec->opts.nr_cblocks = nr_cblocks_max;
- if (verbose > 0)
- pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+ pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
+ if (rec->opts.comp_level > comp_level_max)
+ rec->opts.comp_level = comp_level_max;
+ pr_debug("comp level: %d\n", rec->opts.comp_level);
+
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);