From 1f0e6edcd968ff19211245f7da6039e983aa51e5 Mon Sep 17 00:00:00 2001 From: Martin Liška Date: Thu, 11 Feb 2021 13:37:55 +0100 Subject: perf annotate: Fix jump parsing for C++ code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Considering the following testcase: int foo(int a, int b) { for (unsigned i = 0; i < 1000000000; i++) a += b; return a; } int main() { foo (3, 4); return 0; } 'perf annotate' displays: 86.52 │40055e: → ja 40056c 13.37 │400560: mov -0x18(%rbp),%eax │400563: add %eax,-0x14(%rbp) │400566: addl $0x1,-0x4(%rbp) 0.11 │40056a: → jmp 400557 │40056c: mov -0x14(%rbp),%eax │40056f: pop %rbp and the 'ja 40056c' does not link to the location in the function. It's caused by fact that comma is wrongly parsed, it's part of function signature. With my patch I see: 86.52 │ ┌──ja 26 13.37 │ │ mov -0x18(%rbp),%eax │ │ add %eax,-0x14(%rbp) │ │ addl $0x1,-0x4(%rbp) 0.11 │ │↑ jmp 11 │26:└─→mov -0x14(%rbp),%eax and 'o' output prints: 86.52 │4005┌── ↓ ja 40056c 13.37 │4005│0: mov -0x18(%rbp),%eax │4005│3: add %eax,-0x14(%rbp) │4005│6: addl $0x1,-0x4(%rbp) 0.11 │4005│a: ↑ jmp 400557 │4005└─→ mov -0x14(%rbp),%eax On the contrary, compiling the very same file with gcc -x c, the parsing is fine because function arguments are not displayed: jmp 400543 Committer testing: Before: $ cat cpp_args_annotate.c int foo(int a, int b) { for (unsigned i = 0; i < 1000000000; i++) a += b; return a; } int main() { foo (3, 4); return 0; } $ gcc --version |& head -1 gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9) $ gcc -g cpp_args_annotate.c -o cpp_args_annotate $ perf record ./cpp_args_annotate [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.275 MB perf.data (7188 samples) ] $ perf annotate --stdio2 foo Samples: 7K of event 'cycles:u', 4000 Hz, Event count (approx.): 7468429289, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) ↓ jmp 1d a += b; 13.45 13: mov -0x18(%rbp),%eax add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.09 1d: cmpl $0x3b9ac9ff,-0x4(%rbp) 86.46 ↑ jbe 13 return a; mov -0x14(%rbp),%eax } pop %rbp ← retq $ I.e. works for C, now lets switch to C++: $ g++ -g cpp_args_annotate.c -o cpp_args_annotate $ perf record ./cpp_args_annotate [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.268 MB perf.data (6976 samples) ] $ perf annotate --stdio2 foo Samples: 6K of event 'cycles:u', 4000 Hz, Event count (approx.): 7380681761, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(int, int): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) cmpl $0x3b9ac9ff,-0x4(%rbp) 86.53 → ja 40112c a += b; 13.32 mov -0x18(%rbp),%eax 0.00 add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.15 → jmp 401117 return a; mov -0x14(%rbp),%eax } pop %rbp ← retq $ Reproduced. Now with this patch: Reusing the C++ built binary, as we can see here: $ readelf -wi cpp_args_annotate | grep producer DW_AT_producer : (indirect string, offset: 0x2e): GNU C++14 10.2.1 20201125 (Red Hat 10.2.1-9) -mtune=generic -march=x86-64 -g $ And furthermore: $ file cpp_args_annotate cpp_args_annotate: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=4fe3cab260204765605ec630d0dc7a7e93c361a9, for GNU/Linux 3.2.0, with debug_info, not stripped $ perf buildid-list -i cpp_args_annotate 4fe3cab260204765605ec630d0dc7a7e93c361a9 $ perf buildid-list | grep cpp_args_annotate 4fe3cab260204765605ec630d0dc7a7e93c361a9 /home/acme/c/cpp_args_annotate $ It now works: $ perf annotate --stdio2 foo Samples: 6K of event 'cycles:u', 4000 Hz, Event count (approx.): 7380681761, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(int, int): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) 11: cmpl $0x3b9ac9ff,-0x4(%rbp) 86.53 ↓ ja 26 a += b; 13.32 mov -0x18(%rbp),%eax 0.00 add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.15 ↑ jmp 11 return a; 26: mov -0x14(%rbp),%eax } pop %rbp ← retq $ Signed-off-by: Martin Liška Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Slaby Link: http://lore.kernel.org/lkml/13e1a405-edf9-e4c2-4327-a9b454353730@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++++++ tools/perf/util/annotate.h | 1 + 2 files changed, 9 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ce8c07bc8c56..e60841b86d27 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -321,12 +321,18 @@ bool ins__is_call(const struct ins *ins) /* * Prevents from matching commas in the comment section, e.g.: * ffff200008446e70: b.cs ffff2000084470f4 // b.hs, b.nlast + * + * and skip comma as part of function arguments, e.g.: + * 1d8b4ac */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { if (ops->raw_comment && c > ops->raw_comment) return NULL; + if (ops->raw_func_start && c > ops->raw_func_start) + return NULL; + return c; } @@ -341,6 +347,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s u64 start, end; ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->raw_func_start = strchr(ops->raw, '<'); + c = validate_comma(c, ops); /* diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0a0cd4f32175..096cdaf21b01 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -32,6 +32,7 @@ struct ins { struct ins_operands { char *raw; char *raw_comment; + char *raw_func_start; struct { char *raw; char *name; -- cgit v1.2.3 From 96de68fff5ded8833bf5832658cb43c54f86ff6c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 9 Feb 2021 14:51:48 +0000 Subject: perf symbols: Use (long) for iterator for bfd symbols GCC (GCC) 8.4.0 20200304 fails to build perf with: : util/symbol.c: In function 'dso__load_bfd_symbols': : util/symbol.c:1626:16: error: comparison of integer expressions of different signednes : for (i = 0; i < symbols_count; ++i) { : ^ : util/symbol.c:1632:16: error: comparison of integer expressions of different signednes : while (i + 1 < symbols_count && : ^ : util/symbol.c:1637:13: error: comparison of integer expressions of different signednes : if (i + 1 < symbols_count && : ^ : cc1: all warnings being treated as errors It's unlikely that the symtable will be that big, but the fix is an oneliner and as perf has CORE_CFLAGS += -Wextra, which makes build to fail together with CORE_CFLAGS += -Werror Fixes: eac9a4342e54 ("perf symbols: Try reading the symbol table with libbfd") Signed-off-by: Dmitry Safonov Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Jacek Caban Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Remi Bernon Link: http://lore.kernel.org/lkml/20210209145148.178702-1-dima@arista.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 64a039cbba1b..1645fb4ec9ed 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1561,12 +1561,11 @@ static int bfd2elf_binding(asymbol *symbol) int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) { int err = -1; - long symbols_size, symbols_count; + long symbols_size, symbols_count, i; asection *section; asymbol **symbols, *sym; struct symbol *symbol; bfd *abfd; - u_int i; u64 start, len; abfd = bfd_openr(dso->long_name, NULL); -- cgit v1.2.3 From 00a3423492bc90be99e529a64f13fdd80a0e8c0a Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Wed, 10 Feb 2021 14:17:38 -0500 Subject: perf symbols: Make dso__load_bfd_symbols() load PE files from debug cache only dso__load_bfd_symbols() attempts to load a DSO at its original path, then closes it and loads the file in the debug cache. This is incorrect. It should ignore the original file and work with only the debug cache. The original file may have changed or may not even exist, for example if the debug cache has been transferred to another machine via "perf archive". This fix makes it only load the file in the debug cache. Further notes from Nicholas: dso__load_bfd_symbols() is called in a loop from dso__load() for a variety of paths. These are generated by the various DSO_BINARY_TYPEs in the binary_type_symtab list at the top of util/symbol.c. In each case the debugfile passed to dso__load_bfd_symbols() is the path to try. One of those iterations (the first one I believe) passes the original path as the debugfile. If the file still exists at the original path, this is the one that ends up being used in case the debugcache was deleted or the PE file doesn't have a build-id. A later iteration (BUILD_ID_CACHE) passes debugfile as the file in the debugcache if it has a build-id. Even if the file was previously loaded at its original path, (if I understand correctly) this load will override it so the debugcache file ends up being used. Committer notes: So if it fails to find in the cache, it will eventually hope for the best and look at the path in the local filesystem, which in many cases is enough. At some point we need to switch from this "hope for the best" approach to one that warns the user that there is no guarantee, if no buildid is present, that just by looking at the pathname the symbolisation will work. Signed-off-by: Nicholas Fraser Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Frank Ch. Eigler Cc: Huw Davies Cc: Ian Rogers Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Remi Bernon Cc: Song Liu Cc: Tommi Rantala Cc: Ulrich Czekalla Link: http://lore.kernel.org/lkml/e58e1237-94ab-e1c9-a7b9-473531906954@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1645fb4ec9ed..89a1d5f2ad84 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1568,7 +1568,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) bfd *abfd; u64 start, len; - abfd = bfd_openr(dso->long_name, NULL); + abfd = bfd_openr(debugfile, NULL); if (!abfd) return -1; @@ -1585,12 +1585,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (section) dso->text_offset = section->vma - section->filepos; - bfd_close(abfd); - - abfd = bfd_openr(debugfile, NULL); - if (!abfd) - return -1; - if (!bfd_check_format(abfd, bfd_object)) { pr_debug2("%s: cannot read %s bfd file.\n", __func__, debugfile); -- cgit v1.2.3 From 77771a97011fa9146ccfaf2983a3a2885dc57b6f Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Wed, 10 Feb 2021 14:18:02 -0500 Subject: perf symbols: Fix return value when loading PE DSO The first time dso__load() was called on a PE file it always returned -1 error. This caused the first call to map__find_symbol() to always fail on a PE file so the first sample from each PE file always had symbol . Subsequent samples succeed however because the DSO is already loaded. This fixes dso__load() to return 0 when successfully loading a DSO with libbfd. Fixes: eac9a4342e5447ca ("perf symbols: Try reading the symbol table with libbfd") Signed-off-by: Nicholas Fraser Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Frank Ch. Eigler Cc: Huw Davies Cc: Ian Rogers Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Remi Bernon Cc: Song Liu Cc: Tommi Rantala Cc: Ulrich Czekalla Link: http://lore.kernel.org/lkml/1671b43b-09c3-1911-dbf8-7f030242fbf7@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 89a1d5f2ad84..dfb7fb7b39a7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1860,8 +1860,10 @@ int dso__load(struct dso *dso, struct map *map) if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (bfdrc == 0) + if (bfdrc == 0) { + ret = 0; break; + } if (!is_reg || sirc < 0) continue; -- cgit v1.2.3 From 105f75ebf9519c239c96f9b16e16520f45fea72b Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Wed, 10 Feb 2021 14:26:46 +0800 Subject: perf probe: Fix kretprobe issue caused by GCC bug Perf failed to add a kretprobe event with debuginfo of vmlinux which is compiled by gcc with -fpatchable-function-entry option enabled. The same issue with kernel module. Issue: # perf probe -v 'kernel_clone%return $retval' ...... Writing event: r:probe/kernel_clone__return _text+599624 $retval Failed to write event: Invalid argument Error: Failed to add events. Reason: Invalid argument (Code: -22) # cat /sys/kernel/debug/tracing/error_log [156.75] trace_kprobe: error: Retprobe address must be an function entry Command: r:probe/kernel_clone__return _text+599624 $retval ^ # llvm-dwarfdump vmlinux |grep -A 10 -w 0x00df2c2b 0x00df2c2b: DW_TAG_subprogram DW_AT_external (true) DW_AT_name ("kernel_clone") DW_AT_decl_file ("/home/code/linux-next/kernel/fork.c") DW_AT_decl_line (2423) DW_AT_decl_column (0x07) DW_AT_prototyped (true) DW_AT_type (0x00dcd492 "pid_t") DW_AT_low_pc (0xffff800010092648) DW_AT_high_pc (0xffff800010092b9c) DW_AT_frame_base (DW_OP_call_frame_cfa) # cat /proc/kallsyms |grep kernel_clone ffff800010092640 T kernel_clone # readelf -s vmlinux |grep -i kernel_clone 183173: ffff800010092640 1372 FUNC GLOBAL DEFAULT 2 kernel_clone # objdump -d vmlinux |grep -A 10 -w \: ffff800010092640 : ffff800010092640: d503201f nop ffff800010092644: d503201f nop ffff800010092648: d503233f paciasp ffff80001009264c: a9b87bfd stp x29, x30, [sp, #-128]! ffff800010092650: 910003fd mov x29, sp ffff800010092654: a90153f3 stp x19, x20, [sp, #16] The entry address of kernel_clone converted by debuginfo is _text+599624 (0x92648), which is consistent with the value of DW_AT_low_pc attribute. But the symbolic address of kernel_clone from /proc/kallsyms is ffff800010092640. This issue is found on arm64, -fpatchable-function-entry=2 is enabled when CONFIG_DYNAMIC_FTRACE_WITH_REGS=y; Just as objdump displayed the assembler contents of kernel_clone, GCC generate 2 NOPs at the beginning of each function. kprobe_on_func_entry detects that (_text+599624) is not the entry address of the function, which leads to the failure of adding kretprobe event. kprobe_on_func_entry ->_kprobe_addr ->kallsyms_lookup_size_offset ->arch_kprobe_on_func_entry // FALSE The cause of the issue is that the first instruction in the compile unit indicated by DW_AT_low_pc does not include NOPs. This issue exists in all gcc versions that support -fpatchable-function-entry option. I have reported it to the GCC community: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98776 Currently arm64 and PA-RISC may enable fpatchable-function-entry option. The kernel compiled with clang does not have this issue. FIX: This GCC issue only cause the registration failure of the kretprobe event which doesn't need debuginfo. So, stop using debuginfo for retprobe. map will be used to query the probe function address. Signed-off-by: Jianlin Lv Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: clang-built-linux@googlegroups.com Cc: Frank Ch. Eigler Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Sumanth Korikkar Link: http://lore.kernel.org/lkml/20210210062646.2377995-1-Jianlin.Lv@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8eae2afff71a..a59d3268adb0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -894,6 +894,16 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct debuginfo *dinfo; int ntevs, ret = 0; + /* Workaround for gcc #98776 issue. + * Perf failed to add kretprobe event with debuginfo of vmlinux which is + * compiled by gcc with -fpatchable-function-entry option enabled. The + * same issue with kernel module. The retprobe doesn`t need debuginfo. + * This workaround solution use map to query the probe function address + * for retprobe event. + */ + if (pev->point.retprobe) + return 0; + dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf); if (!dinfo) { if (need_dwarf) -- cgit v1.2.3