diff options
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r-- | tools/perf/builtin-record.c | 346 |
1 files changed, 303 insertions, 43 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f3f7f3100336..e2c3a585a61e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -62,6 +62,9 @@ struct switch_output { unsigned long time; const char *str; bool set; + char **filenames; + int num_files; + int cur_file; }; struct record { @@ -130,6 +133,11 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +static int record__aio_enabled(struct record *rec); +static int record__comp_enabled(struct record *rec); +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size); + #ifdef HAVE_AIO_SUPPORT static int record__aio_write(struct aiocb *cblock, int trace_fd, void *buf, size_t size, off_t off) @@ -180,9 +188,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) if (rem_size == 0) { cblock->aio_fildes = -1; /* - * md->refcount is incremented in perf_mmap__push() for - * every enqueued aio write request so decrement it because - * the request is now complete. + * md->refcount is incremented in record__aio_pushfn() for + * every aio write request started in record__aio_push() so + * decrement it because the request is now complete. */ perf_mmap__put(md); rc = 1; @@ -237,18 +245,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all) } while (1); } -static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +struct record_aio { + struct record *rec; + void *data; + size_t size; +}; + +static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) { - struct record *rec = to; - int ret, trace_fd = rec->session->data->file.fd; + struct record_aio *aio = to; - rec->samples++; + /* + * map->base data pointed by buf is copied into free map->aio.data[] buffer + * to release space in the kernel buffer as fast as possible, calling + * perf_mmap__consume() from perf_mmap__push() function. + * + * That lets the kernel to proceed with storing more profiling data into + * the kernel buffer earlier than other per-cpu kernel buffers are handled. + * + * Coping can be done in two steps in case the chunk of profiling data + * crosses the upper bound of the kernel buffer. In this case we first move + * part of data from map->start till the upper bound and then the reminder + * from the beginning of the kernel buffer till the end of the data chunk. + */ + + if (record__comp_enabled(aio->rec)) { + size = zstd_compress(aio->rec->session, aio->data + aio->size, + perf_mmap__mmap_len(map) - aio->size, + buf, size); + } else { + memcpy(aio->data + aio->size, buf, size); + } + + if (!aio->size) { + /* + * Increment map->refcount to guard map->aio.data[] buffer + * from premature deallocation because map object can be + * released earlier than aio write request started on + * map->aio.data[] buffer is complete. + * + * perf_mmap__put() is done at record__aio_complete() + * after started aio request completion or at record__aio_push() + * if the request failed to start. + */ + perf_mmap__get(map); + } + + aio->size += size; + + return size; +} + +static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) +{ + int ret, idx; + int trace_fd = rec->session->data->file.fd; + struct record_aio aio = { .rec = rec, .size = 0 }; + + /* + * Call record__aio_sync() to wait till map->aio.data[] buffer + * becomes available after previous aio write operation. + */ + + idx = record__aio_sync(map, false); + aio.data = map->aio.data[idx]; + ret = perf_mmap__push(map, &aio, record__aio_pushfn); + if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ + return ret; - ret = record__aio_write(cblock, trace_fd, bf, size, off); + rec->samples++; + ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); if (!ret) { - rec->bytes_written += size; + *off += aio.size; + rec->bytes_written += aio.size; if (switch_output_size(rec)) trigger_hit(&switch_output_trigger); + } else { + /* + * Decrement map->refcount incremented in record__aio_pushfn() + * back if record__aio_write() operation failed to start, otherwise + * map->refcount is decremented in record__aio_complete() after + * aio write operation finishes successfully. + */ + perf_mmap__put(map); } return ret; @@ -270,7 +349,7 @@ static void record__aio_mmap_read_sync(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_mmap *maps = evlist->mmap; - if (!rec->opts.nr_cblocks) + if (!record__aio_enabled(rec)) return; for (i = 0; i < evlist->nr_mmaps; i++) { @@ -304,13 +383,8 @@ static int record__aio_parse(const struct option *opt, #else /* HAVE_AIO_SUPPORT */ static int nr_cblocks_max = 0; -static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) -{ - return -1; -} - -static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused, - void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused) +static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, + off_t *off __maybe_unused) { return -1; } @@ -334,6 +408,67 @@ static int record__aio_enabled(struct record *rec) return rec->opts.nr_cblocks > 0; } +#define MMAP_FLUSH_DEFAULT 1 +static int record__mmap_flush_parse(const struct option *opt, + const char *str, + int unset) +{ + int flush_max; + struct record_opts *opts = (struct record_opts *)opt->value; + static struct parse_tag tags[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + + if (unset) + return 0; + + if (str) { + opts->mmap_flush = parse_tag_value(str, tags); + if (opts->mmap_flush == (int)-1) + opts->mmap_flush = strtol(str, NULL, 0); + } + + if (!opts->mmap_flush) + opts->mmap_flush = MMAP_FLUSH_DEFAULT; + + flush_max = perf_evlist__mmap_size(opts->mmap_pages); + flush_max /= 4; + if (opts->mmap_flush > flush_max) + opts->mmap_flush = flush_max; + + return 0; +} + +#ifdef HAVE_ZSTD_SUPPORT +static unsigned int comp_level_default = 1; + +static int record__parse_comp_level(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = opt->value; + + if (unset) { + opts->comp_level = 0; + } else { + if (str) + opts->comp_level = strtol(str, NULL, 0); + if (!opts->comp_level) + opts->comp_level = comp_level_default; + } + + return 0; +} +#endif +static unsigned int comp_level_max = 22; + +static int record__comp_enabled(struct record *rec) +{ + return rec->opts.comp_level > 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -347,6 +482,11 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size { struct record *rec = to; + if (record__comp_enabled(rec)) { + size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + bf = map->data; + } + rec->samples++; return record__write(rec, map, bf, size); } @@ -392,7 +532,7 @@ static int record__process_auxtrace(struct perf_tool *tool, size_t padding; u8 pad[8] = {0}; - if (!perf_data__is_pipe(data)) { + if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { off_t file_offset; int fd = perf_data__fd(data); int err; @@ -543,7 +683,8 @@ static int record__mmap_evlist(struct record *rec, if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, - opts->nr_cblocks, opts->affinity) < 0) { + opts->nr_cblocks, opts->affinity, + opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -732,15 +873,46 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) } } +static size_t process_comp_header(void *record, size_t increment) +{ + struct compressed_event *event = record; + size_t size = sizeof(*event); + + if (increment) { + event->header.size += increment; + return increment; + } + + event->header.type = PERF_RECORD_COMPRESSED; + event->header.size = size; + + return size; +} + +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size) +{ + size_t compressed; + size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1; + + compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, + max_record_size, process_comp_header); + + session->bytes_transferred += src_size; + session->bytes_compressed += compressed; + + return compressed; +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, - bool overwrite) + bool overwrite, bool synch) { u64 bytes_written = rec->bytes_written; int i; int rc = 0; struct perf_mmap *maps; int trace_fd = rec->data.file.fd; - off_t off; + off_t off = 0; if (!evlist) return 0; @@ -756,28 +928,33 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli off = record__aio_get_pos(trace_fd); for (i = 0; i < evlist->nr_mmaps; i++) { + u64 flush = 0; struct perf_mmap *map = &maps[i]; if (map->base) { record__adjust_affinity(rec, map); + if (synch) { + flush = map->flush; + map->flush = 1; + } if (!record__aio_enabled(rec)) { - if (perf_mmap__push(map, rec, record__pushfn) != 0) { + if (perf_mmap__push(map, rec, record__pushfn) < 0) { + if (synch) + map->flush = flush; rc = -1; goto out; } } else { - int idx; - /* - * Call record__aio_sync() to wait till map->data buffer - * becomes available after previous aio write request. - */ - idx = record__aio_sync(map, false); - if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { + if (record__aio_push(rec, map, &off) < 0) { record__aio_set_pos(trace_fd, off); + if (synch) + map->flush = flush; rc = -1; goto out; } } + if (synch) + map->flush = flush; } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && @@ -803,15 +980,15 @@ out: return rc; } -static int record__mmap_read_all(struct record *rec) +static int record__mmap_read_all(struct record *rec, bool synch) { int err; - err = record__mmap_read_evlist(rec, rec->evlist, false); + err = record__mmap_read_evlist(rec, rec->evlist, false, synch); if (err) return err; - return record__mmap_read_evlist(rec, rec->evlist, true); + return record__mmap_read_evlist(rec, rec->evlist, true, synch); } static void record__init_features(struct record *rec) @@ -837,6 +1014,10 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); + if (!record__comp_enabled(rec)) + perf_header__clear_feat(&session->header, HEADER_COMPRESSED); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -890,6 +1071,7 @@ record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; int fd, err; + char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; @@ -910,7 +1092,7 @@ record__switch_output(struct record *rec, bool at_exit) fd = perf_data__switch(data, timestamp, rec->session->header.data_offset, - at_exit); + at_exit, &new_filename); if (fd >= 0 && !at_exit) { rec->bytes_written = 0; rec->session->header.data_size = 0; @@ -920,6 +1102,21 @@ record__switch_output(struct record *rec, bool at_exit) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", data->path, timestamp); + if (rec->switch_output.num_files) { + int n = rec->switch_output.cur_file + 1; + + if (n >= rec->switch_output.num_files) + n = 0; + rec->switch_output.cur_file = n; + if (rec->switch_output.filenames[n]) { + remove(rec->switch_output.filenames[n]); + free(rec->switch_output.filenames[n]); + } + rec->switch_output.filenames[n] = new_filename; + } else { + free(new_filename); + } + /* Output tracking events */ if (!at_exit) { record__synthesize(rec, false); @@ -1093,7 +1290,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + err = perf_event__synthesize_bpf_events(session, process_synthesized_event, machine, opts); if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); @@ -1116,7 +1313,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; + struct perf_evlist *sb_evlist = NULL; int fd; + float ratio = 0; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -1146,6 +1345,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) fd = perf_data__fd(data); rec->session = session; + if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { + pr_err("Compression initialization failed.\n"); + return -1; + } + + session->header.env.comp_type = PERF_COMP_ZSTD; + session->header.env.comp_level = rec->opts.comp_level; + record__init_features(rec); if (rec->opts.use_clockid && rec->opts.clockid_res_ns) @@ -1175,6 +1382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = -1; goto out_child; } + session->header.env.comp_mmap_len = session->evlist->mmap_len; err = bpf__apply_obj_config(); if (err) { @@ -1216,6 +1424,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + if (!opts->no_bpf_event) + bpf_event__add_sb_event(&sb_evlist, &session->header.env); + + if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + err = record__synthesize(rec, false); if (err < 0) goto out_child; @@ -1310,7 +1526,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (trigger_is_hit(&switch_output_trigger) || done || draining) perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); - if (record__mmap_read_all(rec) < 0) { + if (record__mmap_read_all(rec, false) < 0) { trigger_error(&auxtrace_snapshot_trigger); trigger_error(&switch_output_trigger); err = -1; @@ -1411,8 +1627,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); + if (rec->session->bytes_transferred && rec->session->bytes_compressed) { + ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; + session->header.env.comp_ratio = ratio + 0.5; + } + if (forks) { int exit_status; @@ -1459,13 +1681,23 @@ out_child: else samples[0] = '\0'; - fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", perf_data__size(data) / 1024.0 / 1024.0, data->path, postfix, samples); + if (ratio) { + fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", + rec->session->bytes_transferred / 1024.0 / 1024.0, + ratio); + } + fprintf(stderr, " ]\n"); } out_delete_session: + zstd_fini(&session->zstd_data); perf_session__delete(session); + + if (!opts->no_bpf_event) + perf_evlist__stop_sb_thread(sb_evlist); return status; } @@ -1813,6 +2045,7 @@ static struct record record = { .uses_mmap = true, .default_per_cpu = true, }, + .mmap_flush = MMAP_FLUSH_DEFAULT, }, .tool = { .sample = process_sample_event, @@ -1870,7 +2103,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", @@ -1879,6 +2112,9 @@ static struct option __record_options[] = { OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages", record__parse_mmap_pages), + OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", + "Minimal number of bytes that is extracted from mmap data pages (default: 1)", + record__mmap_flush_parse), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, @@ -1932,10 +2168,10 @@ static struct option __record_options[] = { "use per-thread mmaps"), OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, @@ -1968,9 +2204,11 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, - &record.switch_output.set, "signal,size,time", - "Switch output when receive SIGUSR2 or cross size,time threshold", + &record.switch_output.set, "signal or size[BKMG] or time[smhd]", + "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), + OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, + "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), #ifdef HAVE_AIO_SUPPORT @@ -1981,6 +2219,11 @@ static struct option __record_options[] = { OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", record__parse_affinity), +#ifdef HAVE_ZSTD_SUPPORT + OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, + "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", + record__parse_comp_level), +#endif OPT_END() }; @@ -2040,6 +2283,12 @@ int cmd_record(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } + + if (rec->opts.comp_level != 0) { + pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); + rec->no_buildid = true; + } + if (rec->opts.record_switch_events && !perf_can_record_switch_events()) { ui__error("kernel does not support recording context switch events\n"); @@ -2057,6 +2306,13 @@ int cmd_record(int argc, const char **argv) alarm(rec->switch_output.time); } + if (rec->switch_output.num_files) { + rec->switch_output.filenames = calloc(sizeof(char *), + rec->switch_output.num_files); + if (!rec->switch_output.filenames) + return -EINVAL; + } + /* * Allow aliases to facilitate the lookup of symbols for address * filters. Refer to auxtrace_parse_filters(). @@ -2178,10 +2434,14 @@ int cmd_record(int argc, const char **argv) if (rec->opts.nr_cblocks > nr_cblocks_max) rec->opts.nr_cblocks = nr_cblocks_max; - if (verbose > 0) - pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); + + if (rec->opts.comp_level > comp_level_max) + rec->opts.comp_level = comp_level_max; + pr_debug("comp level: %d\n", rec->opts.comp_level); err = __cmd_record(&record, argc, argv); out: |