aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/mmap.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
committerIngo Molnar <mingo@kernel.org>2019-05-18 10:24:43 +0200
commit62e1c09418fc16d27720b128275cac61367e2c1b (patch)
tree4759aa6662b1398e2b93696ace58f6f309722b06 /tools/perf/util/mmap.c
parentMerge tag 'media/v5.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (diff)
parentperf stat: Support 'percore' event qualifier (diff)
downloadlinux-dev-62e1c09418fc16d27720b128275cac61367e2c1b.tar.xz
linux-dev-62e1c09418fc16d27720b128275cac61367e2c1b.zip
Merge tag 'perf-core-for-mingo-5.2-20190517' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf.data: Alexey Budankov: - Streaming compression of perf ring buffer into PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x perf.data file size reduction on variety of tested workloads what saves storage space on larger server systems where perf.data size can easily reach several tens or even hundreds of GiBs, especially when profiling with DWARF-based stacks and tracing of context switches. perf record: Arnaldo Carvalho de Melo - Improve -user-regs/intr-regs suggestions to overcome errors. perf annotate: Jin Yao: - Remove hist__account_cycles() from callback, speeding up branch processing (perf record -b). perf stat: - Add a 'percore' event qualifier, e.g.: -e cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for both hardware threads in a core. We can already do this with --per-core, but it's often useful to do this together with other metrics that are collected per hardware thread. I.e. now its possible to do this per-event, and have it mixed with other events not aggregated by core. core libraries: Donald Yandt: - Check for errors when doing fgets(/proc/version). Jiri Olsa: - Speed up report for perf compiled with linbunwind. tools headers: Arnaldo Carvalho de Melo - Update memcpy_64.S, x86's kvm.h and pt_regs.h. arm64: Florian Fainelli: - Map Brahma-B53 CPUID to cortex-a53 events. - Add Cortex-A57 and Cortex-A72 events. csky: Mao Han: - Add DWARF register mappings for libdw, allowing --call-graph=dwarf to work on the C-SKY arch. x86: Andi Kleen/Kan Liang: - Add support for recording and printing XMM registers, available, for instance, on Icelake. Kan Liang: - Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON support. UPI replaced the Intel QuickPath Interconnect (QPI) in Xeon Skylake-SP. Intel PT: Adrian Hunter . Fix instructions sampling rate. . Timestamp fixes. . Improve exported-sql-viewer GUI, allowing, for instance, to copy'n'paste the trees, useful for e-mailing. Documentation: Thomas Richter: - Add description for 'perf --debug stderr=1', which redirects stderr to stdout. libtraceevent: Tzvetomir Stoyanov: - Add man pages for the various APIs. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util/mmap.c')
-rw-r--r--tools/perf/util/mmap.c102
1 files changed, 27 insertions, 75 deletions
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..868c0b0e909c 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
}
#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_enabled(struct perf_mmap *map)
+{
+ return map->aio.nr_cblocks > 0;
+}
#ifdef HAVE_LIBNUMA_SUPPORT
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
@@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
return 0;
}
-#else
+#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
{
map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
@@ -285,81 +289,12 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
zfree(&map->aio.cblocks);
zfree(&map->aio.aiocb);
}
-
-int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
- off_t *off)
+#else /* !HAVE_AIO_SUPPORT */
+static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
{
- u64 head = perf_mmap__read_head(md);
- unsigned char *data = md->base + page_size;
- unsigned long size, size0 = 0;
- void *buf;
- int rc = 0;
-
- rc = perf_mmap__read_init(md);
- if (rc < 0)
- return (rc == -EAGAIN) ? 0 : -1;
-
- /*
- * md->base data is copied into md->data[idx] buffer to
- * release space in the kernel buffer as fast as possible,
- * thru perf_mmap__consume() below.
- *
- * That lets the kernel to proceed with storing more
- * profiling data into the kernel buffer earlier than other
- * per-cpu kernel buffers are handled.
- *
- * Coping can be done in two steps in case the chunk of
- * profiling data crosses the upper bound of the kernel buffer.
- * In this case we first move part of data from md->start
- * till the upper bound and then the reminder from the
- * beginning of the kernel buffer till the end of
- * the data chunk.
- */
-
- size = md->end - md->start;
-
- if ((md->start & md->mask) + size != (md->end & md->mask)) {
- buf = &data[md->start & md->mask];
- size = md->mask + 1 - (md->start & md->mask);
- md->start += size;
- memcpy(md->aio.data[idx], buf, size);
- size0 = size;
- }
-
- buf = &data[md->start & md->mask];
- size = md->end - md->start;
- md->start += size;
- memcpy(md->aio.data[idx] + size0, buf, size);
-
- /*
- * Increment md->refcount to guard md->data[idx] buffer
- * from premature deallocation because md object can be
- * released earlier than aio write request started
- * on mmap->data[idx] is complete.
- *
- * perf_mmap__put() is done at record__aio_complete()
- * after started request completion.
- */
- perf_mmap__get(md);
-
- md->prev = head;
- perf_mmap__consume(md);
-
- rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
- if (!rc) {
- *off += size0 + size;
- } else {
- /*
- * Decrement md->refcount back if aio write
- * operation failed to start.
- */
- perf_mmap__put(md);
- }
-
- return rc;
+ return 0;
}
-#else
+
static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
struct mmap_params *mp __maybe_unused)
{
@@ -374,6 +309,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
void perf_mmap__munmap(struct perf_mmap *map)
{
perf_mmap__aio_munmap(map);
+ if (map->data != NULL) {
+ munmap(map->data, perf_mmap__mmap_len(map));
+ map->data = NULL;
+ }
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -442,6 +381,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
map->flush = mp->flush;
+ map->comp_level = mp->comp_level;
+
+ if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+ map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->data == MAP_FAILED) {
+ pr_debug2("failed to mmap data buffer, error %d\n",
+ errno);
+ map->data = NULL;
+ return -1;
+ }
+ }
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
@@ -540,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
rc = perf_mmap__read_init(md);
if (rc < 0)
- return (rc == -EAGAIN) ? 0 : -1;
+ return (rc == -EAGAIN) ? 1 : -1;
size = md->end - md->start;