diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-02-09 13:51:16 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-02-09 13:51:20 +1100 |
commit | bd0044e8d5ddf308e4ac09734192418767700b82 (patch) | |
tree | 50fa4fea46466b3acd8e1bcca3ac69a9d323c6f7 /tools | |
parent | 31fab8ad1ce0b55ddf9a356b374e3b556dc02a2b (diff) | |
parent | 5bf728f0221887fe3ddea3143c812404b85003d1 (diff) |
Merge remote-tracking branch 'tip/auto-latest'
Diffstat (limited to 'tools')
58 files changed, 1431 insertions, 209 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cddd5d06e1cb..3603556fa0d9 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -280,6 +280,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 37fee272618f..14458658e988 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,8 @@ #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_GDTR_IDTR 46 +#define EXIT_REASON_LDTR_TR 47 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 @@ -113,6 +115,8 @@ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ + { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ { EXIT_REASON_INVEPT, "INVEPT" }, \ @@ -129,6 +133,7 @@ { EXIT_REASON_XRSTORS, "XRSTORS" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 +#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 84e6b35da4bd..e6cd62b1264b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -28,6 +28,7 @@ #include <fcntl.h> #include <errno.h> #include <asm/unistd.h> +#include <linux/err.h> #include <linux/kernel.h> #include <linux/bpf.h> #include <linux/list.h> @@ -779,7 +780,7 @@ static int bpf_program__collect_reloc(struct bpf_program *prog, size_t nr_maps, GElf_Shdr *shdr, Elf_Data *data, Elf_Data *symbols, - int maps_shndx) + int maps_shndx, struct bpf_map *maps) { int i, nrels; @@ -829,7 +830,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } - map_idx = sym.st_value / sizeof(struct bpf_map_def); + /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].offset == sym.st_value) { + pr_debug("relocation: find map %zd (%s) for insn %u\n", + map_idx, maps[map_idx].name, insn_idx); + break; + } + } + if (map_idx >= nr_maps) { pr_warning("bpf relocation: map_idx %d large than %d\n", (int)map_idx, (int)nr_maps - 1); @@ -953,7 +962,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) err = bpf_program__collect_reloc(prog, nr_maps, shdr, data, obj->efile.symbols, - obj->efile.maps_shndx); + obj->efile.maps_shndx, + obj->maps); if (err) return err; } @@ -1419,37 +1429,33 @@ static void bpf_program__set_type(struct bpf_program *prog, prog->type = type; } -int bpf_program__set_tracepoint(struct bpf_program *prog) -{ - if (!prog) - return -EINVAL; - bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); - return 0; -} - -int bpf_program__set_kprobe(struct bpf_program *prog) -{ - if (!prog) - return -EINVAL; - bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); - return 0; -} - static bool bpf_program__is_type(struct bpf_program *prog, enum bpf_prog_type type) { return prog ? (prog->type == type) : false; } -bool bpf_program__is_tracepoint(struct bpf_program *prog) -{ - return bpf_program__is_type(prog, BPF_PROG_TYPE_TRACEPOINT); -} - -bool bpf_program__is_kprobe(struct bpf_program *prog) -{ - return bpf_program__is_type(prog, BPF_PROG_TYPE_KPROBE); -} +#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ +int bpf_program__set_##NAME(struct bpf_program *prog) \ +{ \ + if (!prog) \ + return -EINVAL; \ + bpf_program__set_type(prog, TYPE); \ + return 0; \ +} \ + \ +bool bpf_program__is_##NAME(struct bpf_program *prog) \ +{ \ + return bpf_program__is_type(prog, TYPE); \ +} \ + +BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); +BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); +BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); +BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); +BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); +BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); +BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); int bpf_map__fd(struct bpf_map *map) { @@ -1537,3 +1543,10 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) } return ERR_PTR(-ENOENT); } + +long libbpf_get_error(const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + return 0; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a5a8b86a06fe..4014d1ba5e3d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -22,8 +22,8 @@ #define __BPF_LIBBPF_H #include <stdio.h> +#include <stdint.h> #include <stdbool.h> -#include <linux/err.h> #include <sys/types.h> // for size_t enum libbpf_errno { @@ -174,11 +174,21 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n); /* * Adjust type of bpf program. Default is kprobe. */ +int bpf_program__set_socket_filter(struct bpf_program *prog); int bpf_program__set_tracepoint(struct bpf_program *prog); int bpf_program__set_kprobe(struct bpf_program *prog); +int bpf_program__set_sched_cls(struct bpf_program *prog); +int bpf_program__set_sched_act(struct bpf_program *prog); +int bpf_program__set_xdp(struct bpf_program *prog); +int bpf_program__set_perf_event(struct bpf_program *prog); +bool bpf_program__is_socket_filter(struct bpf_program *prog); bool bpf_program__is_tracepoint(struct bpf_program *prog); bool bpf_program__is_kprobe(struct bpf_program *prog); +bool bpf_program__is_sched_cls(struct bpf_program *prog); +bool bpf_program__is_sched_act(struct bpf_program *prog); +bool bpf_program__is_xdp(struct bpf_program *prog); +bool bpf_program__is_perf_event(struct bpf_program *prog); /* * We don't need __attribute__((packed)) now since it is @@ -224,4 +234,6 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); void *bpf_map__priv(struct bpf_map *map); +long libbpf_get_error(const void *ptr); + #endif diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 11c3be3bcce7..f054ca1b899d 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -1,6 +1,7 @@ #ifndef __SUBCMD_PARSE_OPTIONS_H #define __SUBCMD_PARSE_OPTIONS_H +#include <linux/kernel.h> #include <stdbool.h> #include <stdint.h> @@ -132,32 +133,32 @@ struct option { #define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } -#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } +#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) } #define OPT_STRING_OPTARG(s, l, v, a, h, d) \ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ - .value = check_vtype(v, const char **), (a), .help = (h), \ + .value = check_vtype(v, const char **), .argh =(a), .help = (h), \ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } #define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ - .value = check_vtype(v, const char **), (a), .help = (h), \ + .value = check_vtype(v, const char **), .argh = (a), .help = (h), \ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \ .set = check_vtype(os, bool *)} -#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} +#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) } #define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } #define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\ - .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ + .value = (v), .arg = (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG} #define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \ - .value = (v), (a), .help = (h), .callback = (f), \ + .value = (v), .argh = (a), .help = (h), .callback = (f), \ .flags = PARSE_OPT_OPTARG, .data = (d) } /* parse_options() will filter out the processed options and leave the diff --git a/tools/perf/Build b/tools/perf/Build index b12d5d1666e3..9b79f8d7db50 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -3,10 +3,12 @@ perf-y += builtin-annotate.o perf-y += builtin-config.o perf-y += builtin-diff.o perf-y += builtin-evlist.o +perf-y += builtin-ftrace.o perf-y += builtin-help.o perf-y += builtin-sched.o perf-y += builtin-buildid-list.o perf-y += builtin-buildid-cache.o +perf-y += builtin-kallsyms.o perf-y += builtin-list.o perf-y += builtin-record.o perf-y += builtin-report.o @@ -39,8 +41,7 @@ CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \ -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \ - -DPREFIX="BUILD_STR($(prefix_SQ))" \ - -include $(OUTPUT)PERF-VERSION-FILE + -DPREFIX="BUILD_STR($(prefix_SQ))" CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))" CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 3f06730c7f47..2da07e51e119 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -248,7 +248,7 @@ output fields set for caheline offsets output: Code address, Code symbol, Shared Object, Source line dso - coalesced by shared object -By default the coalescing is setup with 'pid,tid,iaddr'. +By default the coalescing is setup with 'pid,iaddr'. STDIO OUTPUT ------------ diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt new file mode 100644 index 000000000000..2d96de6132a9 --- /dev/null +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -0,0 +1,36 @@ +perf-ftrace(1) +============= + +NAME +---- +perf-ftrace - simple wrapper for kernel's ftrace functionality + + +SYNOPSIS +-------- +[verse] +'perf ftrace' <command> + +DESCRIPTION +----------- +The 'perf ftrace' command is a simple wrapper of kernel's ftrace +functionality. It only supports single thread tracing currently and +just reads trace_pipe in text and then write it to stdout. + +The following options apply to perf ftrace. + +OPTIONS +------- + +-t:: +--tracer=:: + Tracer to use: function_graph or function. + +-v:: +--verbose=:: + Verbosity level. + + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-trace[1] diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt new file mode 100644 index 000000000000..954ea9e21236 --- /dev/null +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -0,0 +1,24 @@ +perf-kallsyms(1) +============== + +NAME +---- +perf-kallsyms - Searches running kernel for symbols + +SYNOPSIS +-------- +[verse] +'perf kallsyms <options> symbol_name[,symbol_name...]' + +DESCRIPTION +----------- +This command searches the running kernel kallsyms file for the given symbol(s) +and prints information about it, including the DSO, the kallsyms begin/end +addresses and the addresses in the ELF kallsyms symbol table (for symbols in +modules). + +OPTIONS +------- +-v:: +--verbose=:: + Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5054d9147f0f..27256bc68eda 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -421,9 +421,19 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. ---switch-output:: +--switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one -when receiving a SIGUSR2. +based on 'mode' value: + "signal" - when receiving a SIGUSR2 (default value) or + <size> - when reaching the size threshold, size is expected to + be a number with appended unit character - B/K/M/G + <time> - when reaching the time threshold, size is expected to + be a number with appended unit character - s/m/h/d + + Note: the precision of the size threshold hugely depends + on your configuration - the number and size of your ring + buffers (-m). It is generally more precise for higher sizes + (like >5M), for lower values expect different sizes. A possible use case is to, given an external event, slice the perf.data file that gets then processed, possibly via a perf script, to decide if that diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 76173969ab80..d33deddb0146 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -143,6 +143,8 @@ OPTIONS for 'perf sched timehist' stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. +--state:: + Show task state when it switched out. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5dc5c6a09ac4..4ed5f239ba7d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -36,7 +36,7 @@ There are several variants of perf script: 'perf script report <script> [args]' to run and display the results of <script>. <script> is the name displayed in the output of 'perf - trace --list' i.e. the actual script name minus any language + script --list' i.e. the actual script name minus any language extension. The perf.data output from a previous run of 'perf script record <script>' is used and should be present for this command to succeed. [args] refers to the (mainly optional) args expected by @@ -76,7 +76,7 @@ OPTIONS Any command you can specify in a shell. -D:: ---dump-raw-script=:: +--dump-raw-trace=:: Display verbose dump of the trace data. -L:: diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 781b019751a4..afd728672b6f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -35,7 +35,10 @@ OPTIONS -e:: --expr:: - List of syscalls to show, currently only syscall names. +--event:: + List of syscalls and other perf events (tracepoints, HW cache events, + etc) to show. + See 'perf list' for a complete list of events. Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. @@ -135,9 +138,6 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. ---event:: - Trace other events, see 'perf list' for a complete list. - --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 76c84f0eec52..03cf947755b9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -291,8 +291,10 @@ else endif endif ifneq ($(feature-dwarf), 1) - msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); - NO_DWARF := 1 + ifndef NO_DWARF + msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); + NO_DWARF := 1 + endif else ifneq ($(feature-dwarf_getlocations), 1) msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157); diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8bb16aa9d661..4da19b6ba94a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -661,6 +661,7 @@ ifndef NO_PERF_READ_VDSOX32 endif ifndef NO_JVMTI $(call QUIET_INSTALL, $(LIBJVMTI)) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)'; endif $(call QUIET_INSTALL, libexec) \ diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h index 26759363f921..36e375f5a211 100644 --- a/tools/perf/arch/arm64/include/dwarf-regs-table.h +++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h @@ -2,12 +2,12 @@ /* This is included in perf/util/dwarf-regs.c */ static const char * const aarch64_regstr_tbl[] = { - "%r0", "%r1", "%r2", "%r3", "%r4", - "%r5", "%r6", "%r7", "%r8", "%r9", - "%r10", "%r11", "%r12", "%r13", "%r14", - "%r15", "%r16", "%r17", "%r18", "%r19", - "%r20", "%r21", "%r22", "%r23", "%r24", - "%r25", "%r26", "%r27", "%r28", "%r29", + "%x0", "%x1", "%x2", "%x3", "%x4", + "%x5", "%x6", "%x7", "%x8", "%x9", + "%x10", "%x11", "%x12", "%x13", "%x14", + "%x15", "%x16", "%x17", "%x18", "%x19", + "%x20", "%x21", "%x22", "%x23", "%x24", + "%x25", "%x26", "%x27", "%x28", "%x29", "%lr", "%sp", }; #endif diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f8ca7a4ebabc..e2b21723bbf8 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -58,7 +58,7 @@ struct c2c_hist_entry { struct hist_entry he; }; -static char const *coalesce_default = "pid,tid,iaddr"; +static char const *coalesce_default = "pid,iaddr"; struct perf_c2c { struct perf_tool tool; @@ -2476,6 +2476,7 @@ static int build_cl_output(char *cl_sort, bool no_source) "mean_rmt," "mean_lcl," "mean_load," + "tot_recs," "cpucnt,", add_sym ? "symbol," : "", add_dso ? "dso," : "", diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c new file mode 100644 index 000000000000..d05658d2b8f1 --- /dev/null +++ b/tools/perf/builtin-ftrace.c @@ -0,0 +1,243 @@ +/* + * builtin-ftrace.c + * + * Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org> + * + * Released under the GPL v2. + */ + +#include "builtin.h" +#include "perf.h" + +#include <unistd.h> +#include <signal.h> + +#include "debug.h" +#include <subcmd/parse-options.h> +#include "evlist.h" +#include "target.h" +#include "thread_map.h" + + +#define DEFAULT_TRACER "function_graph" + +struct perf_ftrace { + struct perf_evlist *evlist; + struct target target; + const char *tracer; +}; + +static bool done; + +static void sig_handler(int sig __maybe_unused) +{ + done = true; +} + +/* + * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since + * we asked by setting its exec_error to the function below, + * ftrace__workload_exec_failed_signal. + * + * XXX We need to handle this more appropriately, emitting an error, etc. + */ +static void ftrace__workload_exec_failed_signal(int signo __maybe_unused, + siginfo_t *info __maybe_unused, + void *ucontext __maybe_unused) +{ + /* workload_exec_errno = info->si_value.sival_int; */ + done = true; +} + +static int write_tracing_file(const char *name, const char *val) +{ + char *file; + int fd, ret = -1; + ssize_t size = strlen(val); + + file = get_tracing_file(name); + if (!file) { + pr_debug("cannot get tracing file: %s\n", name); + return -1; + } + + fd = open(file, O_WRONLY); + if (fd < 0) { + pr_debug("cannot open tracing file: %s\n", name); + goto out; + } + + if (write(fd, val, size) == size) + ret = 0; + else + pr_debug("write '%s' to tracing/%s failed\n", val, name); + + close(fd); +out: + put_tracing_file(file); + return ret; +} + +static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) +{ + if (write_tracing_file("tracing_on", "0") < 0) + return -1; + + if (write_tracing_file("current_tracer", "nop") < 0) + return -1; + + if (write_tracing_file("set_ftrace_pid", " ") < 0) + return -1; + + return 0; +} + +static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +{ + char *trace_file; + int trace_fd; + char *trace_pid; + char buf[4096]; + struct pollfd pollfd = { + .events = POLLIN, + }; + + if (geteuid() != 0) { + pr_err("ftrace only works for root!\n"); + return -1; + } + + if (argc < 1) + return -1; + + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + + reset_tracing_files(ftrace); + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + goto out; + + if (perf_evlist__prepare_workload(ftrace->evlist, &ftrace->target, + argv, false, ftrace__workload_exec_failed_signal) < 0) + goto out; + + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { + pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + goto out; + } + + if (asprintf(&trace_pid, "%d", thread_map__pid(ftrace->evlist->threads, 0)) < 0) { + pr_err("failed to allocate pid string\n"); + goto out; + } + + if (write_tracing_file("set_ftrace_pid", trace_pid) < 0) { + pr_err("failed to set pid: %s\n", trace_pid); + goto out_free_pid; + } + + trace_file = get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + goto out_free_pid; + } + + trace_fd = open(trace_file, O_RDONLY); + + put_tracing_file(trace_file); + + if (trace_fd < 0) { + pr_err("failed to open trace_pipe\n"); + goto out_free_pid; + } + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + pollfd.fd = trace_fd; + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } + + perf_evlist__start_workload(ftrace->evlist); + + while (!done) { + if (poll(&pollfd, 1, -1) < 0) + break; + + if (pollfd.revents & POLLIN) { + int n = read(trace_fd, buf, sizeof(buf)); + if (n < 0) + break; + if (fwrite(buf, n, 1, stdout) != 1) + break; + } + } + + write_tracing_file("tracing_on", "0"); + + /* read remaining buffer contents */ + while (true) { + int n = read(trace_fd, buf, sizeof(buf)); + if (n <= 0) + break; + if (fwrite(buf, n, 1, stdout) != 1) + break; + } + +out_close_fd: + close(trace_fd); +out_free_pid: + free(trace_pid); +out: + reset_tracing_files(ftrace); + + return done ? 0 : -1; +} + +int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) +{ + int ret; + struct perf_ftrace ftrace = { + .tracer = "function_graph", + .target = { .uid = UINT_MAX, }, + }; + const char * const ftrace_usage[] = { + "perf ftrace [<options>] <command>", + "perf ftrace [<options>] -- <command> [<options>]", + NULL + }; + const struct option ftrace_options[] = { + OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", + "tracer to use: function_graph(default) or function"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose"), + OPT_END() + }; + + argc = parse_options(argc, argv, ftrace_options, ftrace_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + usage_with_options(ftrace_usage, ftrace_options); + + ftrace.evlist = perf_evlist__new(); + if (ftrace.evlist == NULL) + return -ENOMEM; + + ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target); + if (ret < 0) + goto out_delete_evlist; + + if (ftrace.tracer == NULL) + ftrace.tracer = DEFAULT_TRACER; + + ret = __cmd_ftrace(&ftrace, argc, argv); + +out_delete_evlist: + perf_evlist__delete(ftrace.evlist); + + return ret; +} diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 3bdb2c78a21b..93da24a638be 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -434,7 +434,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) const char * const builtin_help_subcommands[] = { "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", "record", "report", "bench", "stat", "timechart", "top", "annotate", - "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", + "script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data", #ifdef HAVE_LIBELF_SUPPORT "probe", #endif diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c new file mode 100644 index 000000000000..224bfc454b4a --- /dev/null +++ b/tools/perf/builtin-kallsyms.c @@ -0,0 +1,67 @@ +/* + * builtin-kallsyms.c + * + * Builtin command: Look for a symbol in the running kernel and its modules + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "builtin.h" +#include <linux/compiler.h> +#include <subcmd/parse-options.h> +#include "debug.h" +#include "machine.h" +#include "symbol.h" + +static int __cmd_kallsyms(int argc, const char **argv) +{ + int i; + struct machine *machine = machine__new_kallsyms(); + + if (machine == NULL) { + pr_err("Couldn't read /proc/kallsyms\n"); + return -1; + } + + for (i = 0; i < argc; ++i) { + struct map *map; + struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); + + if (symbol == NULL) { + printf("%s: not found\n", argv[i]); + continue; + } + + printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n", + symbol->name, map->dso->short_name, map->dso->long_name, + map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end), + symbol->start, symbol->end); + } + + machine__delete(machine); + return 0; +} + +int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + const char * const kallsyms_usage[] = { + "perf kallsyms [<options>] symbol_name", + NULL + }; + + argc = parse_options(argc, argv, options, kallsyms_usage, 0); + if (argc < 1) + usage_with_options(kallsyms_usage, options); + + symbol_conf.sort_by_name = true; + symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); + if (symbol__init(NULL) < 0) + return -1; + + return __cmd_kallsyms(argc, argv); +} diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ec10e9427d9..33a9eaaf9db4 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,15 @@ #include <asm/bug.h> #include <linux/time64.h> +struct switch_output { + bool enabled; + bool signal; + unsigned long size; + unsigned long time; + const char *str; + bool set; +}; + struct record { struct perf_tool tool; struct record_opts opts; @@ -62,10 +71,33 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; - bool switch_output; + struct switch_output switch_output; unsigned long long samples; }; +static volatile int auxtrace_record__snapshot_started; +static DEFINE_TRIGGER(auxtrace_snapshot_trigger); +static DEFINE_TRIGGER(switch_output_trigger); + +static bool switch_output_signal(struct record *rec) +{ + return rec->switch_output.signal && + trigger_is_ready(&switch_output_trigger); +} + +static bool switch_output_size(struct record *rec) +{ + return rec->switch_output.size && + trigger_is_ready(&switch_output_trigger) && + (rec->bytes_written >= rec->switch_output.size); +} + +static bool switch_output_time(struct record *rec) +{ + return rec->switch_output.time && + trigger_is_ready(&switch_output_trigger); +} + static int record__write(struct record *rec, void *bf, size_t size) { if (perf_data_file__write(rec->session->file, bf, size) < 0) { @@ -74,6 +106,10 @@ static int record__write(struct record *rec, void *bf, size_t size) } rec->bytes_written += size; + + if (switch_output_size(rec)) + trigger_hit(&switch_output_trigger); + return 0; } @@ -193,10 +229,6 @@ static volatile int done; static volatile int signr = -1; static volatile int child_finished; -static volatile int auxtrace_record__snapshot_started; -static DEFINE_TRIGGER(auxtrace_snapshot_trigger); -static DEFINE_TRIGGER(switch_output_trigger); - static void sig_handler(int sig) { if (sig == SIGCHLD) @@ -712,6 +744,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, } static void snapshot_sig_handler(int sig); +static void alarm_sig_handler(int sig); int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, @@ -842,11 +875,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); - if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) trigger_on(&auxtrace_snapshot_trigger); - if (rec->switch_output) + if (rec->switch_output.enabled) trigger_on(&switch_output_trigger); } else { signal(SIGUSR2, SIG_IGN); @@ -1043,6 +1076,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = fd; goto out_child; } + + /* re-arm the alarm */ + if (rec->switch_output.time) + alarm(rec->switch_output.time); } if (hits == rec->samples) { @@ -1352,6 +1389,78 @@ out_free: return ret; } +static void switch_output_size_warn(struct record *rec) +{ + u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); + struct switch_output *s = &rec->switch_output; + + wakeup_size /= 2; + + if (s->size < wakeup_size) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), wakeup_size); + pr_warning("WARNING: switch-output data size lower than " + "wakeup kernel buffer size (%s) " + "expect bigger perf.data sizes\n", buf); + } +} + +static int switch_output_setup(struct record *rec) +{ + struct switch_output *s = &rec->switch_output; + static struct parse_tag tags_size[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + static struct parse_tag tags_time[] = { + { .tag = 's', .mult = 1 }, + { .tag = 'm', .mult = 60 }, + { .tag = 'h', .mult = 60*60 }, + { .tag = 'd', .mult = 60*60*24 }, + { .tag = 0 }, + }; + unsigned long val; + + if (!s->set) + return 0; + + if (!strcmp(s->str, "signal")) { + s->signal = true; + pr_debug("switch-output with SIGUSR2 signal\n"); + goto enabled; + } + + val = parse_tag_value(s->str, tags_size); + if (val != (unsigned long) -1) { + s->size = val; + pr_debug("switch-output with %s size threshold\n", s->str); + goto enabled; + } + + val = parse_tag_value(s->str, tags_time); + if (val != (unsigned long) -1) { + s->time = val; + pr_debug("switch-output with %s time threshold (%lu seconds)\n", + s->str, s->time); + goto enabled; + } + + return -1; + +enabled: + rec->timestamp_filename = true; + s->enabled = true; + + if (s->size && !rec->opts.no_buffering) + switch_output_size_warn(rec); + + return 0; +} + static const char * const __record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -1519,8 +1628,10 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), - OPT_BOOLEAN(0, "switch-output", &record.switch_output, - "Switch output when receive SIGUSR2"), + OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, + &record.switch_output.set, "signal,size,time", + "Switch output when receive SIGUSR2 or cross size,time threshold", + "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), OPT_END() @@ -1578,8 +1689,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } - if (rec->switch_output) - rec->timestamp_filename = true; + if (switch_output_setup(rec)) { + parse_options_usage(record_usage, record_options, "switch-output", 0); + return -EINVAL; + } + + if (rec->switch_output.time) { + signal(SIGALRM, alarm_sig_handler); + alarm(rec->switch_output.time); + } if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); @@ -1629,7 +1747,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); - } else if (rec->switch_output) { + } else if (rec->switch_output.enabled) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching @@ -1721,6 +1839,8 @@ out: static void snapshot_sig_handler(int sig __maybe_unused) { + struct record *rec = &record; + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { trigger_hit(&auxtrace_snapshot_trigger); auxtrace_record__snapshot_started = 1; @@ -1728,6 +1848,14 @@ static void snapshot_sig_handler(int sig __maybe_unused) trigger_error(&auxtrace_snapshot_trigger); } - if (trigger_is_ready(&switch_output_trigger)) + if (switch_output_signal(rec)) + trigger_hit(&switch_output_trigger); +} + +static void alarm_sig_handler(int sig __maybe_unused) +{ + struct record *rec = &record; + + if (switch_output_time(rec)) trigger_hit(&switch_output_trigger); } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5b134b0d1ff3..daceb3202200 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -77,6 +77,22 @@ struct sched_atom { #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" +/* task state bitmask, copied from include/linux/sched.h */ +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* in tsk->state again */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 + enum thread_state { THREAD_SLEEPING = 0, THREAD_WAIT_CPU, @@ -206,6 +222,7 @@ struct perf_sched { bool show_cpu_visual; bool show_wakeups; bool show_migrations; + bool show_state; u64 skipped_samples; const char *time_str; struct perf_time_interval ptime; @@ -216,13 +233,20 @@ struct perf_sched { struct thread_runtime { u64 last_time; /* time of previous sched in/out event */ u64 dt_run; /* run time */ - u64 dt_wait; /* time between CPU access (off cpu) */ + u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ + u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ + u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ u64 dt_delay; /* time between wakeup and sched-in */ u64 ready_to_run; /* time of wakeup */ struct stats run_stats; u64 total_run_time; + u64 total_sleep_time; + u64 total_iowait_time; + u64 total_preempt_time; + u64 total_delay_time; + int last_state; u64 migrations; }; @@ -1821,6 +1845,9 @@ static void timehist_header(struct perf_sched *sched) printf(" %-*s %9s %9s %9s", comm_width, "task name", "wait time", "sch delay", "run time"); + if (sched->show_state) + printf(" %s", "state"); + printf("\n"); /* @@ -1831,9 +1858,14 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - printf(" %-*s %9s %9s %9s\n", comm_width, + printf(" %-*s %9s %9s %9s", comm_width, "[tid/pid]", "(msec)", "(msec)", "(msec)"); + if (sched->show_state) + printf(" %5s", ""); + + printf("\n"); + /* * separator */ @@ -1846,18 +1878,34 @@ static void timehist_header(struct perf_sched *sched) graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line); + if (sched->show_state) + printf(" %.5s", graph_dotted_line); + printf("\n"); } +static char task_state_char(struct thread *thread, int state) +{ + static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; + unsigned bit = state ? ffs(state) : 0; + + /* 'I' for idle */ + if (thread->tid == 0) + return 'I'; + + return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; +} + static void timehist_print_sample(struct perf_sched *sched, struct perf_sample *sample, struct addr_location *al, struct thread *thread, - u64 t) + u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; + u64 wait_time; timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); @@ -1880,10 +1928,15 @@ static void timehist_print_sample(struct perf_sched *sched, printf(" %-*s ", comm_width, timehist_get_commstr(thread)); - print_sched_time(tr->dt_wait, 6); + wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; + print_sched_time(wait_time, 6); + print_sched_time(tr->dt_delay, 6); print_sched_time(tr->dt_run, 6); + if (sched->show_state) + printf(" %5c ", task_state_char(thread, state)); + if (sched->show_wakeups) printf(" %-*s", comm_width, ""); @@ -1930,8 +1983,11 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, u64 t, u64 tprev) { r->dt_delay = 0; - r->dt_wait = 0; + r->dt_sleep = 0; + r->dt_iowait = 0; + r->dt_preempt = 0; r->dt_run = 0; + if (tprev) { r->dt_run = t - tprev; if (r->ready_to_run) { @@ -1943,12 +1999,25 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, if (r->last_time > tprev) pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); - else if (r->last_time) - r->dt_wait = tprev - r->last_time; + else if (r->last_time) { + u64 dt_wait = tprev - r->last_time; + + if (r->last_state == TASK_RUNNING) + r->dt_preempt = dt_wait; + else if (r->last_state == TASK_UNINTERRUPTIBLE) + r->dt_iowait = dt_wait; + else + r->dt_sleep = dt_wait; + } } update_stats(&r->run_stats, r->dt_run); - r->total_run_time += r->dt_run; + + r->total_run_time += r->dt_run; + r->total_delay_time += r->dt_delay; + r->total_sleep_time += r->dt_sleep; + r->total_iowait_time += r->dt_iowait; + r->total_preempt_time += r->dt_preempt; } static bool is_idle_sample(struct perf_sample *sample, @@ -2373,6 +2442,8 @@ static int timehist_sched_change_event(struct perf_tool *tool, struct thread_runtime *tr = NULL; u64 tprev, t = sample->time; int rc = 0; + int state = perf_evsel__intval(evsel, sample, "prev_state"); + if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event. skipping it\n", @@ -2447,8 +2518,10 @@ static int timehist_sched_change_event(struct perf_tool *tool, * time. we only care total run time and run stat. */ last_tr->dt_run = 0; - last_tr->dt_wait = 0; last_tr->dt_delay = 0; + last_tr->dt_sleep = 0; + last_tr->dt_iowait = 0; + last_tr->dt_preempt = 0; if (itr->cursor.nr) callchain_append(&itr->callchain, &itr->cursor, t - tprev); @@ -2458,7 +2531,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->summary_only) - timehist_print_sample(sched, sample, &al, thread, t); + timehist_print_sample(sched, sample, &al, thread, t, state); out: if (sched->hist_time.start == 0 && t >= ptime->start) @@ -2470,6 +2543,9 @@ out: /* time of this sched_switch event becomes last time task seen */ tr->last_time = sample->time; + /* last state is used to determine where to account wait time */ + tr->last_state = state; + /* sched out event for task so reset ready to run time */ tr->ready_to_run = 0; } @@ -2526,7 +2602,26 @@ static void print_thread_runtime(struct thread *t, printf("\n"); } +static void print_thread_waittime(struct thread *t, + struct thread_runtime *r) +{ + printf("%*s %5d %9" PRIu64 " ", + comm_width, timehist_get_commstr(t), t->ppid, + (u64) r->run_stats.n); + + print_sched_time(r->total_run_time, 8); + print_sched_time(r->total_sleep_time, 6); + printf(" "); + print_sched_time(r->total_iowait_time, 6); + printf(" "); + print_sched_time(r->total_preempt_time, 6); + printf(" "); + print_sched_time(r->total_delay_time, 6); + printf("\n"); +} + struct total_run_stats { + struct perf_sched *sched; u64 sched_count; u64 task_count; u64 total_run_time; @@ -2545,7 +2640,11 @@ static int __show_thread_runtime(struct thread *t, void *priv) stats->task_count++; stats->sched_count += r->run_stats.n; stats->total_run_time += r->total_run_time; - print_thread_runtime(t, r); + + if (stats->sched->show_state) + print_thread_waittime(t, r); + else + print_thread_runtime(t, r); } return 0; @@ -2633,18 +2732,24 @@ static void timehist_print_summary(struct perf_sched *sched, u64 hist_time = sched->hist_time.end - sched->hist_time.start; memset(&totals, 0, sizeof(totals)); + totals.sched = sched; if (sched->idle_hist) { printf("\nIdle-time summary\n"); printf("%*s parent sched-out ", comm_width, "comm"); printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); + } else if (sched->show_state) { + printf("\nWait-time summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time sleep iowait preempt delay\n"); } else { printf("\nRuntime summary\n"); printf("%*s parent sched-in ", comm_width, "comm"); printf(" run-time min-run avg-run max-run stddev migrations\n"); } printf("%*s (count) ", comm_width, ""); - printf(" (msec) (msec) (msec) (msec) %%\n"); + printf(" (msec) (msec) (msec) (msec) %s\n", + sched->show_state ? "(msec)" : "%"); printf("%.117s\n", graph_dotted_line); machine__for_each_thread(m, show_thread_runtime, &totals); @@ -3240,6 +3345,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), + OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), OPT_PARENT(sched_options) }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2f3ff69fc4e7..c0783b4f7b6c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2180,7 +2180,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the mmap events"), OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), - OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", @@ -2212,6 +2212,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) PARSE_OPT_STOP_AT_NON_OPTION); file.path = input_name; + file.force = symbol_conf.force; if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 206bf72b77fc..40ef9b293d1b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -40,6 +40,7 @@ #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */ #include <stdlib.h> +#include <string.h> #include <linux/err.h> #include <linux/filter.h> #include <linux/audit.h> @@ -2699,6 +2700,91 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) evsel->handler = handler; } +/* + * XXX: Hackish, just splitting the combined -e+--event (syscalls + * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use + * existing facilities unchanged (trace->ev_qualifier + parse_options()). + * + * It'd be better to introduce a parse_options() variant that would return a + * list with the terms it didn't match to an event... + */ +static int trace__parse_events_option(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct trace *trace = (struct trace *)opt->value; + const char *s = str; + char *sep = NULL, *lists[2] = { NULL, NULL, }; + int len = strlen(str), err = -1, list; + char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); + char group_name[PATH_MAX]; + + if (strace_groups_dir == NULL) + return -1; + + if (*s == '!') { + ++s; + trace->not_ev_qualifier = true; + } + + while (1) { + if ((sep = strchr(s, ',')) != NULL) + *sep = '\0'; + + list = 0; + if (syscalltbl__id(trace->sctbl, s) >= 0) { + list = 1; + } else { + path__join(group_name, sizeof(group_name), strace_groups_dir, s); + if (access(group_name, R_OK) == 0) + list = 1; + } + + if (lists[list]) { + sprintf(lists[list] + strlen(lists[list]), ",%s", s); + } else { + lists[list] = malloc(len); + if (lists[list] == NULL) + goto out; + strcpy(lists[list], s); + } + + if (!sep) + break; + + *sep = ','; + s = sep + 1; + } + + if (lists[1] != NULL) { + struct strlist_config slist_config = { + .dirname = strace_groups_dir, + }; + + trace->ev_qualifier = strlist__new(lists[1], &slist_config); + if (trace->ev_qualifier == NULL) { + fputs("Not enough memory to parse event qualifier", trace->output); + goto out; + } + + if (trace__validate_ev_qualifier(trace)) + goto out; + } + + err = 0; + + if (lists[0]) { + struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option); + err = parse_events_option(&o, lists[0], 0); + } +out: + if (sep) + *sep = ','; + + return err; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char *trace_usage[] = { @@ -2730,15 +2816,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .max_stack = UINT_MAX, }; const char *output_name = NULL; - const char *ev_qualifier_str = NULL; const struct option trace_options[] = { - OPT_CALLBACK(0, "event", &trace.evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option), + OPT_CALLBACK('e', "event", &trace, "event", + "event/syscall selector. use 'perf list' to list available events", + trace__parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), - OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), + OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", + trace__parse_events_option), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", @@ -2863,7 +2949,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - if (!trace.trace_syscalls && ev_qualifier_str) { + if (!trace.trace_syscalls && trace.ev_qualifier) { pr_err("The -e option can't be used with --no-syscalls.\n"); goto out; } @@ -2878,28 +2964,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace.open_id = syscalltbl__id(trace.sctbl, "open"); - if (ev_qualifier_str != NULL) { - const char *s = ev_qualifier_str; - struct strlist_config slist_config = { - .dirname = system_path(STRACE_GROUPS_DIR), - }; - - trace.not_ev_qualifier = *s == '!'; - if (trace.not_ev_qualifier) - ++s; - trace.ev_qualifier = strlist__new(s, &slist_config); - if (trace.ev_qualifier == NULL) { - fputs("Not enough memory to parse event qualifier", - trace.output); - err = -ENOMEM; - goto out_close; - } - - err = trace__validate_ev_qualifier(&trace); - if (err) - goto out_close; - } - err = target__validate(&trace.opts.target); if (err) { target__strerror(&trace.opts.target, err, bf, sizeof(bf)); diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 0bcf68e98ccc..036e1e35b1a8 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -23,6 +23,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix); int cmd_evlist(int argc, const char **argv, const char *prefix); int cmd_help(int argc, const char **argv, const char *prefix); int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_kallsyms(int argc, const char **argv, const char *prefix); int cmd_list(int argc, const char **argv, const char *prefix); int cmd_record(int argc, const char **argv, const char *prefix); int cmd_report(int argc, const char **argv, const char *prefix); @@ -40,6 +41,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix); int cmd_inject(int argc, const char **argv, const char *prefix); int cmd_mem(int argc, const char **argv, const char *prefix); int cmd_data(int argc, const char **argv, const char *prefix); +int cmd_ftrace(int argc, const char **argv, const char *prefix); int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ab5cbaa170d0..ac3efd396a72 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -11,7 +11,9 @@ perf-data mainporcelain common perf-diff mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common +perf-ftrace mainporcelain common perf-inject mainporcelain common +perf-kallsyms mainporcelain common perf-kmem mainporcelain common perf-kvm mainporcelain common perf-list mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aa23b3347d6b..34bcf90f8871 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -29,7 +29,6 @@ const char perf_usage_string[] = const char perf_more_info_string[] = "See 'perf help COMMAND' for more information on a specific command."; -int use_browser = -1; static int use_pager = -1; const char *input_name; @@ -47,6 +46,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, @@ -71,6 +71,7 @@ static struct cmd_struct commands[] = { { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, { "data", cmd_data, 0 }, + { "ftrace", cmd_ftrace, 0 }, }; struct pager_config { @@ -329,8 +330,6 @@ static int handle_alias(int *argcp, const char ***argv) return ret; } -const char perf_version_string[] = PERF_VERSION; - #define RUN_SETUP (1<<0) #define USE_PAGER (1<<1) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 6676c2dd6dcb..1cb3d9b540e9 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -44,6 +44,7 @@ perf-y += is_printable_array.o perf-y += bitmap.o perf-y += perf-hooks.o perf-y += clang.o +perf-y += unit_number__scnprintf.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a77dcc0d24e3..37e326bfd2dc 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -247,6 +247,10 @@ static struct test generic_tests[] = { } }, { + .desc = "unit_number__scnprintf", + .func = test__unit_number__scnprint, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 02a33ebcd992..d357dab72e68 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -13,7 +13,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) struct bpf_object *obj; obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); - if (IS_ERR(obj)) + if (libbpf_get_error(obj)) return TEST_FAIL; bpf_object__close(obj); return TEST_OK; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index a512f0c8ff5b..1fa9b9d83aa5 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -96,6 +96,7 @@ int test__perf_hooks(int subtest); int test__clang(int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); +int test__unit_number__scnprint(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c new file mode 100644 index 000000000000..623c2aa53c4a --- /dev/null +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -0,0 +1,37 @@ +#include <linux/compiler.h> +#include <linux/types.h> +#include "tests.h" +#include "util.h" +#include "debug.h" + +int test__unit_number__scnprint(int subtest __maybe_unused) +{ + struct { + u64 n; + const char *str; + } test[] = { + { 1, "1B" }, + { 10*1024, "10K" }, + { 20*1024*1024, "20M" }, + { 30*1024*1024*1024ULL, "30G" }, + { 0, "0B" }, + { 0, NULL }, + }; + unsigned i = 0; + + while (test[i].str) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), test[i].n); + + pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n", + test[i].n, test[i].str, buf); + + if (strcmp(test[i].str, buf)) + return TEST_FAIL; + + i++; + } + + return TEST_OK; +} diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 641b40234a9d..fc4fb669ceee 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -501,8 +501,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, return n; } -static void hist_entry__set_folding(struct hist_entry *he, - struct hist_browser *hb, bool unfold) +static void __hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; @@ -520,12 +520,34 @@ static void hist_entry__set_folding(struct hist_entry *he, he->nr_rows = 0; } +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *browser, bool unfold) +{ + double percent; + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + return; + + __hist_entry__set_folding(he, browser, unfold); + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; +} + static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; struct hist_entry *he; - double percent; nd = rb_first(&browser->hists->entries); while (nd) { @@ -535,21 +557,6 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); hist_entry__set_folding(he, browser, unfold); - - percent = hist_entry__get_percent_limit(he); - if (he->filtered || percent < browser->min_pcnt) - continue; - - if (!he->depth || unfold) - browser->nr_hierarchy_entries++; - if (he->leaf) - browser->nr_callchain_rows += he->nr_rows; - else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { - browser->nr_hierarchy_entries++; - he->has_no_entry = true; - he->nr_rows = 1; - } else - he->has_no_entry = false; } } @@ -564,6 +571,15 @@ static void hist_browser__set_folding(struct hist_browser *browser, bool unfold) ui_browser__reset_index(&browser->b); } +static void hist_browser__set_folding_selected(struct hist_browser *browser, bool unfold) +{ + if (!browser->he_selection) + return; + + hist_entry__set_folding(browser->he_selection, browser, unfold); + browser->b.nr_entries = hist_browser__nr_entries(browser); +} + static void ui_browser__warn_lost_events(struct ui_browser *browser) { ui_browser__warning(browser, 4, @@ -637,10 +653,18 @@ int hist_browser__run(struct hist_browser *browser, const char *help) /* Collapse the whole world. */ hist_browser__set_folding(browser, false); break; + case 'c': + /* Collapse the selected entry. */ + hist_browser__set_folding_selected(browser, false); + break; case 'E': /* Expand the whole world. */ hist_browser__set_folding(browser, true); break; + case 'e': + /* Expand the selected entry. */ + hist_browser__set_folding_selected(browser, true); + break; case 'H': browser->show_headers = !browser->show_headers; hist_browser__update_rows(browser); diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 1f6b0994f4f4..50d13e58210f 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -7,6 +7,7 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; void *perf_gtk_handle; +int use_browser = -1; #ifdef HAVE_GTK2_SUPPORT static int setup_gtk_browser(void) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 3840e3a87057..5da376bc1afc 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -162,6 +162,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls +CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d2c6cdd9d42b..28d41e709128 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -9,6 +9,13 @@ #include "debug.h" #include "vdso.h" +static const char * const debuglink_paths[] = { + "%.0s%s", + "%s/%s", + "%s/.debug/%s", + "/usr/lib/debug%s/%s" +}; + char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { @@ -44,24 +51,43 @@ int dso__read_binary_type_filename(const struct dso *dso, size_t len; switch (type) { - case DSO_BINARY_TYPE__DEBUGLINK: { - char *debuglink; + case DSO_BINARY_TYPE__DEBUGLINK: + { + const char *last_slash; + char dso_dir[PATH_MAX]; + char symfile[PATH_MAX]; + unsigned int i; len = __symbol__join_symfs(filename, size, dso->long_name); - debuglink = filename + len; - while (debuglink != filename && *debuglink != '/') - debuglink--; - if (*debuglink == '/') - debuglink++; + last_slash = filename + len; + while (last_slash != filename && *last_slash != '/') + last_slash--; - ret = -1; - if (!is_regular_file(filename)) + strncpy(dso_dir, filename, last_slash - filename); + dso_dir[last_slash-filename] = '\0'; + + if (!is_regular_file(filename)) { + ret = -1; + break; + } + + ret = filename__read_debuglink(filename, symfile, PATH_MAX); + if (ret) break; - ret = filename__read_debuglink(filename, debuglink, - size - (debuglink - filename)); + /* Check predefined locations where debug file might reside */ + ret = -1; + for (i = 0; i < ARRAY_SIZE(debuglink_paths); i++) { + snprintf(filename, size, + debuglink_paths[i], dso_dir, symfile); + if (is_regular_file(filename)) { + ret = 0; + break; + } } + break; + } case DSO_BINARY_TYPE__BUILD_ID_CACHE: if (dso__build_id_filename(dso, filename, size) == NULL) ret = -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d92e02006fb8..b601f2814a30 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1184,7 +1184,7 @@ unsigned long perf_event_mlock_kb_in_pages(void) return pages; } -static size_t perf_evlist__mmap_size(unsigned long pages) +size_t perf_evlist__mmap_size(unsigned long pages) { if (pages == UINT_MAX) pages = perf_event_mlock_kb_in_pages(); @@ -1224,12 +1224,16 @@ static long parse_pages_arg(const char *str, unsigned long min, if (pages == 0 && min == 0) { /* leave number of pages at 0 */ } else if (!is_power_of_2(pages)) { + char buf[100]; + /* round pages up to next power of 2 */ pages = roundup_pow_of_two(pages); if (!pages) return -EINVAL; - pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", - pages * page_size, pages); + + unit_number__scnprintf(buf, sizeof(buf), pages * page_size); + pr_info("rounding mmap pages size to %s (%lu pages)\n", + buf, pages); } if (pages > max) @@ -1797,7 +1801,7 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) */ ret = write(evlist->workload.cork_fd, &bf, 1); if (ret < 0) - perror("enable to write to pipe"); + perror("unable to write to pipe"); close(evlist->workload.cork_fd); return ret; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4fd034f22d2f..389b9ccdf8c7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -218,6 +218,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); +size_t perf_evlist__mmap_size(unsigned long pages); + void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); void perf_evlist__toggle_enable(struct perf_evlist *evlist); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d89c9c7ef4e5..c567d9f0aa92 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -41,6 +41,8 @@ static const u64 __perf_magic2_sw = 0x50455246494c4532ULL; #define PERF_MAGIC __perf_magic2 +const char perf_version_string[] = PERF_VERSION; + struct perf_file_attr { struct perf_event_attr attr; struct perf_file_section ids; @@ -2801,8 +2803,10 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, } event = pevent_find_event(pevent, evsel->attr.config); - if (event == NULL) + if (event == NULL) { + pr_debug("cannot find event format for %d\n", (int)evsel->attr.config); return -1; + } if (!evsel->name) { snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9b33bef54581..747a034d1ff3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -87,6 +87,25 @@ out_delete: return NULL; } +struct machine *machine__new_kallsyms(void) +{ + struct machine *machine = machine__new_host(); + /* + * FIXME: + * 1) MAP__FUNCTION will go away when we stop loading separate maps for + * functions and data objects. + * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely + * ask for not using the kcore parsing code, once this one is fixed + * to create a map per module. + */ + if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + machine__delete(machine); + machine = NULL; + } + + return machine; +} + static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 354de6e56109..a28305029711 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -129,6 +129,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); +struct machine *machine__new_kallsyms(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index dc6ccaa4e927..78b16100567d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -94,32 +94,10 @@ static int pmu_format(const char *name, struct list_head *format) return 0; } -static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name) +static int convert_scale(const char *scale, char **end, double *sval) { - struct stat st; - ssize_t sret; - char scale[128]; - int fd, ret = -1; - char path[PATH_MAX]; char *lc; - - snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); - - fd = open(path, O_RDONLY); - if (fd == -1) - return -1; - - if (fstat(fd, &st) < 0) - goto error; - - sret = read(fd, scale, sizeof(scale)-1); - if (sret < 0) - goto error; - - if (scale[sret - 1] == '\n') - scale[sret - 1] = '\0'; - else - scale[sret] = '\0'; + int ret = 0; /* * save current locale @@ -134,7 +112,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * lc = strdup(lc); if (!lc) { ret = -ENOMEM; - goto error; + goto out; } /* @@ -144,14 +122,42 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * */ setlocale(LC_NUMERIC, "C"); - alias->scale = strtod(scale, NULL); + *sval = strtod(scale, end); +out: /* restore locale */ setlocale(LC_NUMERIC, lc); - free(lc); + return ret; +} + +static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name) +{ + struct stat st; + ssize_t sret; + char scale[128]; + int fd, ret = -1; + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + if (fstat(fd, &st) < 0) + goto error; + + sret = read(fd, scale, sizeof(scale)-1); + if (sret < 0) + goto error; + + if (scale[sret - 1] == '\n') + scale[sret - 1] = '\0'; + else + scale[sret] = '\0'; - ret = 0; + ret = convert_scale(scale, NULL, &alias->scale); error: close(fd); return ret; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6a6f44dd594b..2c1bca240597 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3023,20 +3023,17 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, tev->nargs = pev->nargs; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); - if (!tev->args) { - err = -ENOMEM; + if (!tev->args) goto errout; - } + for (i = 0; i < tev->nargs; i++) copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); return 1; errout: - if (*tevs) { - clear_probe_trace_events(*tevs, 1); - *tevs = NULL; - } + clear_probe_trace_events(*tevs, 1); + *tevs = NULL; return err; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e74adfbd6a2e..ecabc26ad1ee 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -351,8 +351,10 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - if (!event) - die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); + if (!event) { + pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); + return; + } pid = raw_field_value(event, "common_pid", data); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f268201048a0..349c68144e55 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1191,7 +1191,7 @@ static int u64 sample_type = evsel->attr.sample_type; u64 read_format = evsel->attr.read_format; - /* Standard sample delievery. */ + /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) return tool->sample(tool, event, sample, evsel, machine); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d995743cb673..ceb0e2720223 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -42,7 +42,7 @@ #include "evsel.h" #include "debug.h" -#define VERSION "0.5" +#define VERSION "0.6" static int output_fd; @@ -379,6 +379,34 @@ out: return err; } +static int record_saved_cmdline(void) +{ + unsigned int size; + char *path; + struct stat st; + int ret, err = 0; + + path = get_tracing_file("saved_cmdlines"); + if (!path) { + pr_debug("can't get tracing/saved_cmdline"); + return -ENOMEM; + } + + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + if (write(output_fd, &size, 8) != 8) + err = -EIO; + goto out; + } + err = record_file(path, 8); + +out: + put_tracing_file(path); + return err; +} + static void put_tracepoints_path(struct tracepoint_path *tps) { @@ -539,6 +567,9 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, if (err) goto out; err = record_ftrace_printk(); + if (err) + goto out; + err = record_saved_cmdline(); out: /* diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 33b52eaa39db..de0078e21408 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -160,6 +160,23 @@ void parse_ftrace_printk(struct pevent *pevent, } } +void parse_saved_cmdline(struct pevent *pevent, + char *file, unsigned int size __maybe_unused) +{ + char *comm; + char *line; + char *next = NULL; + int pid; + + line = strtok_r(file, "\n", &next); + while (line) { + sscanf(line, "%d %ms", &pid, &comm); + pevent_register_comm(pevent, comm, pid); + free(comm); + line = strtok_r(NULL, "\n", &next); + } +} + int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size) { return pevent_parse_event(pevent, buf, size, "ftrace"); diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index b67a0ccf5ab9..27420159bf69 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -260,39 +260,53 @@ static int read_header_files(struct pevent *pevent) static int read_ftrace_file(struct pevent *pevent, unsigned long long size) { + int ret; char *buf; buf = malloc(size); - if (buf == NULL) + if (buf == NULL) { + pr_debug("memory allocation failure\n"); return -1; + } - if (do_read(buf, size) < 0) { - free(buf); - return -1; + ret = do_read(buf, size); + if (ret < 0) { + pr_debug("error reading ftrace file.\n"); + goto out; } - parse_ftrace_file(pevent, buf, size); + ret = parse_ftrace_file(pevent, buf, size); + if (ret < 0) + pr_debug("error parsing ftrace file.\n"); +out: free(buf); - return 0; + return ret; } static int read_event_file(struct pevent *pevent, char *sys, unsigned long long size) { + int ret; char *buf; buf = malloc(size); - if (buf == NULL) + if (buf == NULL) { + pr_debug("memory allocation failure\n"); return -1; + } - if (do_read(buf, size) < 0) { + ret = do_read(buf, size); + if (ret < 0) { free(buf); - return -1; + goto out; } - parse_event_file(pevent, buf, size, sys); + ret = parse_event_file(pevent, buf, size, sys); + if (ret < 0) + pr_debug("error parsing event file.\n"); +out: free(buf); - return 0; + return ret; } static int read_ftrace_files(struct pevent *pevent) @@ -341,6 +355,36 @@ static int read_event_files(struct pevent *pevent) return 0; } +static int read_saved_cmdline(struct pevent *pevent) +{ + unsigned long long size; + char *buf; + int ret; + + /* it can have 0 size */ + size = read8(pevent); + if (!size) + return 0; + + buf = malloc(size + 1); + if (buf == NULL) { + pr_debug("memory allocation failure\n"); + return -1; + } + + ret = do_read(buf, size); + if (ret < 0) { + pr_debug("error reading saved cmdlines\n"); + goto out; + } + + parse_saved_cmdline(pevent, buf, size); + ret = 0; +out: + free(buf); + return ret; +} + ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) { char buf[BUFSIZ]; @@ -379,10 +423,11 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) return -1; if (show_version) printf("version = %s\n", version); - free(version); - if (do_read(buf, 1) < 0) + if (do_read(buf, 1) < 0) { + free(version); return -1; + } file_bigendian = buf[0]; host_bigendian = bigendian(); @@ -423,6 +468,11 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) err = read_ftrace_printk(pevent); if (err) goto out; + if (atof(version) >= 0.6) { + err = read_saved_cmdline(pevent); + if (err) + goto out; + } size = trace_data_size; repipe = false; @@ -438,5 +488,6 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) out: if (pevent) trace_event__cleanup(tevent); + free(version); return size; } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index b0af9c81bb0d..1fbc044f9eb0 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -42,6 +42,7 @@ raw_field_value(struct event_format *event, const char *name, void *data); void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); +void parse_saved_cmdline(struct pevent *pevent, char *file, unsigned int size); ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 6fec84dff3f7..bfb9b7987692 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -35,6 +35,7 @@ #include "util.h" #include "debug.h" #include "asm/bug.h" +#include "dso.h" extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, @@ -297,15 +298,58 @@ static int read_unwind_spec_debug_frame(struct dso *dso, int fd; u64 ofs = dso->data.debug_frame_offset; + /* debug_frame can reside in: + * - dso + * - debug pointed by symsrc_filename + * - gnu_debuglink, which doesn't necessary + * has to be pointed by symsrc_filename + */ if (ofs == 0) { fd = dso__data_get_fd(dso, machine); - if (fd < 0) - return -EINVAL; + if (fd >= 0) { + ofs = elf_section_offset(fd, ".debug_frame"); + dso__data_put_fd(dso); + } + + if (ofs <= 0) { + fd = open(dso->symsrc_filename, O_RDONLY); + if (fd >= 0) { + ofs = elf_section_offset(fd, ".debug_frame"); + close(fd); + } + } + + if (ofs <= 0) { + char *debuglink = malloc(PATH_MAX); + int ret = 0; + + ret = dso__read_binary_type_filename( + dso, DSO_BINARY_TYPE__DEBUGLINK, + machine->root_dir, debuglink, PATH_MAX); + if (!ret) { + fd = open(debuglink, O_RDONLY); + if (fd >= 0) { + ofs = elf_section_offset(fd, + ".debug_frame"); + close(fd); + } + } + if (ofs > 0) { + if (dso->symsrc_filename != NULL) { + pr_warning( + "%s: overwrite symsrc(%s,%s)\n", + __func__, + dso->symsrc_filename, + debuglink); + free(dso->symsrc_filename); + } + dso->symsrc_filename = debuglink; + } else { + free(debuglink); + } + } - /* Check the .debug_frame section for unwinding info */ - ofs = elf_section_offset(fd, ".debug_frame"); dso->data.debug_frame_offset = ofs; - dso__data_put_fd(dso); } *offset = ofs; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9ddd98827d12..bf29aed16bd6 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -789,3 +789,16 @@ int is_printable_array(char *p, unsigned int len) } return 1; } + +int unit_number__scnprintf(char *buf, size_t size, u64 n) +{ + char unit[4] = "BKMG"; + int i = 0; + + while (((n / 1024) > 1) && (i < 3)) { + n /= 1024; + i++; + } + + return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1d639e38aa82..6e8be174ec0b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -363,4 +363,5 @@ int is_printable_array(char *p, unsigned int len); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); +int unit_number__scnprintf(char *buf, size_t size, u64 n); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh new file mode 100644 index 000000000000..6905da965f3b --- /dev/null +++ b/tools/testing/selftests/locking/ww_mutex.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs API tests for struct ww_mutex (Wait/Wound mutexes) + +if /sbin/modprobe -q test-ww_mutex; then + /sbin/modprobe -q -r test-ww_mutex + echo "locking/ww_mutex: ok" +else + echo "locking/ww_mutex: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST index b9611c523723..41bae5824339 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST @@ -4,3 +4,4 @@ LOCK03 LOCK04 LOCK05 LOCK06 +LOCK07 diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07 new file mode 100644 index 000000000000..1d1da1477fc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot new file mode 100644 index 000000000000..97dadd1a9e45 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot @@ -0,0 +1 @@ +locktorture.torture_type=ww_mutex_lock diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 8c1cb423cfe6..25d4067c11e4 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -10,7 +10,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c new file mode 100644 index 000000000000..d85ec5b3671c --- /dev/null +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -0,0 +1,195 @@ +/* + * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls + * Copyright (c) 2014-2016 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <sys/syscall.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <setjmp.h> +#include <sys/user.h> +#include <sys/mman.h> +#include <assert.h> + + +asm ( + ".pushsection \".text\", \"ax\"\n\t" + ".balign 4096\n\t" + "test_page: .globl test_page\n\t" + ".fill 4094,1,0xcc\n\t" + "test_syscall_insn:\n\t" + "syscall\n\t" + ".ifne . - test_page - 4096\n\t" + ".error \"test page is not one page long\"\n\t" + ".endif\n\t" + ".popsection" + ); + +extern const char test_page[]; +static void const *current_test_page_addr = test_page; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs; + +static volatile unsigned long rip; + +static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (rip != ctx->uc_mcontext.gregs[REG_RIP]) { + printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n", + rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]); + fflush(stdout); + _exit(1); + } + + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip); +} + +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + /* Set IP and CX to match so that SYSRET can happen. */ + ctx->uc_mcontext.gregs[REG_RIP] = rip; + ctx->uc_mcontext.gregs[REG_RCX] = rip; + + /* R11 and EFLAGS should already match. */ + assert(ctx->uc_mcontext.gregs[REG_EFL] == + ctx->uc_mcontext.gregs[REG_R11]); + + sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND); + + return; +} + +static void test_sigreturn_to(unsigned long ip) +{ + rip = ip; + printf("[RUN]\tsigreturn to 0x%lx\n", ip); + raise(SIGUSR1); +} + +static jmp_buf jmpbuf; + +static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (rip != ctx->uc_mcontext.gregs[REG_RIP]) { + printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n", + rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]); + fflush(stdout); + _exit(1); + } + + siglongjmp(jmpbuf, 1); +} + +static void test_syscall_fallthrough_to(unsigned long ip) +{ + void *new_address = (void *)(ip - 4096); + void *ret; + + printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip); + + ret = mremap((void *)current_test_page_addr, 4096, 4096, + MREMAP_MAYMOVE | MREMAP_FIXED, new_address); + if (ret == MAP_FAILED) { + if (ip <= (1UL << 47) - PAGE_SIZE) { + err(1, "mremap to %p", new_address); + } else { + printf("[OK]\tmremap to %p failed\n", new_address); + return; + } + } + + if (ret != new_address) + errx(1, "mremap malfunctioned: asked for %p but got %p\n", + new_address, ret); + + current_test_page_addr = new_address; + rip = ip; + + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid), + [syscall_insn] "rm" (ip - 2)); + errx(1, "[FAIL]\tSyscall trampoline returned"); + } + + printf("[OK]\tWe survived\n"); +} + +int main() +{ + /* + * When the kernel returns from a slow-path syscall, it will + * detect whether SYSRET is appropriate. If it incorrectly + * thinks that SYSRET is appropriate when RIP is noncanonical, + * it'll crash on Intel CPUs. + */ + sethandler(SIGUSR1, sigusr1, 0); + for (int i = 47; i < 64; i++) + test_sigreturn_to(1UL<<i); + + clearhandler(SIGUSR1); + + sethandler(SIGSEGV, sigsegv_for_fallthrough, 0); + + /* One extra test to check that we didn't screw up the mremap logic. */ + test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE); + + /* These are the interesting cases. */ + for (int i = 47; i < 64; i++) { + test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE); + test_syscall_fallthrough_to(1UL<<i); + } + + return 0; +} |