aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavi Bangoria <ravi.bangoria@amd.com>2021-09-11 10:08:54 +0530
committerArnaldo Carvalho de Melo <acme@redhat.com>2021-09-15 17:54:52 -0300
commit3149733584c8f0ab828eada539df7aa488c023a9 (patch)
treea1a6810417e65abb1d6b83b0ebd1150970f4b9cd
parentMerge branch 'gcc-min-version-5.1' (make gcc-5.1 the minimum version) (diff)
downloadlinux-dev-3149733584c8f0ab828eada539df7aa488c023a9.tar.xz
linux-dev-3149733584c8f0ab828eada539df7aa488c023a9.zip
perf annotate: Add fusion logic for AMD microarchs
AMD family 15h and above microarchs fuse a subset of cmp/test/ALU instructions with branch instructions[1][2]. Add perf annotate fused instruction support for these microarchs. Before: │ testb $0x80,0x51(%rax) │ ┌──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax After: │ ┌──testb $0x80,0x51(%rax) │ ├──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax [1] https://bugzilla.kernel.org/attachment.cgi?id=298553 [2] https://bugzilla.kernel.org/attachment.cgi?id=298555 Committer testing: On a: $ grep -m1 "model name" /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor $ Samples: 44K of event 'cycles', 4000 Hz, Event count (approx.): 7533249650 _int_malloc /usr/lib64/libc-2.33.so [Percent: local period] Percent│ ┌──test %eax,%eax │ ├──jne 884 │ │↓ jmpq 943 │ │ nop │878:│ add $0x10,%rdx 0.64 │ │ add %eax,%eax 0.57 │ │↓ je cc9 0.77 │884:└─→test %esi,%eax │ ↑ je 878 │ mov 0x18(%rdx),%r15 Reported-by: Kim Phillips <kim.phillips@amd.com> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: https //lore.kernel.org/r/20210911043854.8373-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to '')
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c28
-rw-r--r--tools/perf/util/annotate.c1
2 files changed, 27 insertions, 2 deletions
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 24ea12ec7e02..305872692bfd 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -144,9 +144,32 @@ static struct ins x86__instructions[] = {
{ .name = "xorps", .ops = &mov_ops, },
};
-static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
const char *ins2)
{
+ if (strstr(ins2, "jmp"))
+ return false;
+
+ /* Family >= 15h supports cmp/test + branch fusion */
+ if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
+ (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
+ return true;
+ }
+
+ /* Family >= 19h supports some ALU + branch fusion */
+ if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
+ strstarts(ins1, "sub") || strstarts(ins1, "and") ||
+ strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
+ strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
+ return true;
+ }
+
+ return false;
+}
+
+static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
+{
if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
return false;
@@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
if (ret == 3) {
arch->family = family;
arch->model = model;
+ arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
+ amd__ins_is_fused :
+ intel__ins_is_fused;
return 0;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0bae061b2d6d..b55f35485e43 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -183,7 +183,6 @@ static struct arch architectures[] = {
.init = x86__annotate_init,
.instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions),
- .ins_is_fused = x86__ins_is_fused,
.objdump = {
.comment_char = '#',
},