From 11246c708acdfa9512d7b69c18938810c20fd6ab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Oct 2014 17:29:02 -0300 Subject: perf tools: Set thread->mg.machine in all places We were setting this only in machine__init(), i.e. for the map_groups that holds the kernel module maps, not for the one used for a thread's executable mmaps. Now we are sure that we can obtain the machine where a thread is by going via thread->mg->machine, thus we can, in the following patch, make all codepaths that receive machine _and_ thread, drop the machine one. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-y6zgaqsvhrf04v57u15e4ybm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c41411726c7a..8db9626f6835 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -15,7 +15,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine) pid_t pid = thread->pid_; if (pid == thread->tid || pid == -1) { - thread->mg = map_groups__new(); + thread->mg = map_groups__new(machine); } else { leader = machine__findnew_thread(machine, pid, pid); if (leader) -- cgit v1.2.3 From bb871a9c8d68692ed2513b3f0e1c010c2ac12f44 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 12:50:25 -0300 Subject: perf tools: A thread's machine can be found via thread->mg->machine So stop passing both machine and thread to several thread methods, reducing function signature length. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ckcy19dcp1jfkmdihdjcqdn1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 5 ++-- tools/perf/builtin-inject.c | 3 +- tools/perf/builtin-script.c | 7 ++--- tools/perf/builtin-timechart.c | 2 +- tools/perf/builtin-trace.c | 6 ++-- tools/perf/tests/code-reading.c | 3 +- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/util/build-id.c | 3 +- tools/perf/util/callchain.h | 6 ++-- tools/perf/util/event.c | 19 ++++++------ tools/perf/util/event.h | 1 - tools/perf/util/machine.c | 35 +++++++++-------------- tools/perf/util/thread.c | 4 +-- tools/perf/util/thread.h | 5 ++-- tools/perf/util/unwind-libdw.c | 4 +-- tools/perf/util/unwind-libunwind.c | 13 ++++----- 16 files changed, 48 insertions(+), 70 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 9892b0f0bec4..3bb50eac5542 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -232,8 +232,7 @@ out: * index: of callchain entry that needs to be ignored (if any) * -1 if no entry needs to be ignored or in case of errors */ -int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, - struct ip_callchain *chain) +int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) { struct addr_location al; struct dso *dso = NULL; @@ -246,7 +245,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, ip = chain->ips[2]; - thread__find_addr_location(thread, machine, PERF_RECORD_MISC_USER, + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); if (al.map) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index de99ca1bb942..06f1758951f1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -217,8 +217,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9708a1290571..b35517f2ceb5 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -379,7 +379,6 @@ static void print_sample_start(struct perf_sample *sample, static void print_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct perf_event_attr *attr) { @@ -390,7 +389,7 @@ static void print_sample_addr(union perf_event *event, if (!sample_addr_correlates_sym(attr)) return; - perf_event__preprocess_sample_addr(event, sample, machine, thread, &al); + perf_event__preprocess_sample_addr(event, sample, thread, &al); if (PRINT_FIELD(SYM)) { printf(" "); @@ -438,7 +437,7 @@ static void print_sample_bts(union perf_event *event, ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[attr->type].user_set)) { printf(" => "); - print_sample_addr(event, sample, al->machine, thread, attr); + print_sample_addr(event, sample, thread, attr); } if (print_srcline_last) @@ -475,7 +474,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, al->machine, thread, attr); + print_sample_addr(event, sample, thread, attr); if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 35b425b6293f..f5fb256d90d5 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -528,7 +528,7 @@ static const char *cat_backtrace(union perf_event *event, } tal.filtered = 0; - thread__find_addr_location(al.thread, machine, cpumode, + thread__find_addr_location(al.thread, cpumode, MAP__FUNCTION, ip, &tal); if (tal.sym) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fb126459b134..83a4835c8118 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1846,7 +1846,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) return 0; - thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); @@ -1859,11 +1859,11 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE, + thread__find_addr_location(thread, cpumode, MAP__VARIABLE, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, trace->host, cpumode, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, sample->addr, &al); if (al.map) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 67f2d6323558..144a41236456 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -145,8 +145,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr, - &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); if (!al.map || !al.map->dso) { pr_debug("thread__find_addr_map failed\n"); return -1; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 4a456fef66ca..2113f1c8611f 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -187,7 +187,7 @@ static int mmap_events(synth_cb synth) pr_debug("looking for map %p\n", td->map); - thread__find_addr_map(thread, machine, + thread__find_addr_map(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, (unsigned long) (td->map + 1), &al); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a904a4cfe7d3..2e7c68e39330 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -33,8 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) al.map->dso->hit = 1; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 94cfefddf4db..3caccc2c173c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -184,11 +184,9 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, } #ifdef HAVE_SKIP_CALLCHAIN_IDX -extern int arch_skip_callchain_idx(struct machine *machine, - struct thread *thread, struct ip_callchain *chain); +extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); #else -static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused, - struct thread *thread __maybe_unused, +static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, struct ip_callchain *chain __maybe_unused) { return -1; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4af6b279e34a..e00a29fb099f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -730,12 +730,12 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, return machine__process_event(machine, event, sample); } -void thread__find_addr_map(struct thread *thread, - struct machine *machine, u8 cpumode, +void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al) { struct map_groups *mg = thread->mg; + struct machine *machine = mg->machine; bool load_map = false; al->machine = machine; @@ -806,14 +806,14 @@ try_again: } } -void thread__find_addr_location(struct thread *thread, struct machine *machine, +void thread__find_addr_location(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al) { - thread__find_addr_map(thread, machine, cpumode, type, addr, al); + thread__find_addr_map(thread, cpumode, type, addr, al); if (al->map != NULL) al->sym = map__find_symbol(al->map, al->addr, - machine->symbol_filter); + thread->mg->machine->symbol_filter); else al->sym = NULL; } @@ -842,8 +842,7 @@ int perf_event__preprocess_sample(const union perf_event *event, machine->vmlinux_maps[MAP__FUNCTION] == NULL) machine__create_kernel_maps(machine); - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); @@ -902,16 +901,14 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void perf_event__preprocess_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct addr_location *al) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->addr, al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al); if (!al->map) - thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE, + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, sample->addr, al); al->cpu = sample->cpu; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 5699e7e2a790..5f0e0b89e130 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -322,7 +322,6 @@ bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); void perf_event__preprocess_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct addr_location *al); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c70b3ff7b289..08e63fdbd14f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1289,7 +1289,7 @@ static bool symbol__match_regex(struct symbol *sym, regex_t *regex) return 0; } -static void ip__resolve_ams(struct machine *machine, struct thread *thread, +static void ip__resolve_ams(struct thread *thread, struct addr_map_symbol *ams, u64 ip) { @@ -1303,7 +1303,7 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al); + thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -1311,23 +1311,21 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, ams->map = al.map; } -static void ip__resolve_data(struct machine *machine, struct thread *thread, +static void ip__resolve_data(struct thread *thread, u8 m, struct addr_map_symbol *ams, u64 addr) { struct addr_location al; memset(&al, 0, sizeof(al)); - thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, - &al); + thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al); if (al.map == NULL) { /* * some shared data regions have execute bit set which puts * their mapping in the MAP__FUNCTION type array. * Check there as a fallback option before dropping the sample. */ - thread__find_addr_location(thread, machine, m, MAP__FUNCTION, addr, - &al); + thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al); } ams->addr = addr; @@ -1344,9 +1342,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, if (!mi) return NULL; - ip__resolve_ams(al->machine, al->thread, &mi->iaddr, sample->ip); - ip__resolve_data(al->machine, al->thread, al->cpumode, - &mi->daddr, sample->addr); + ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); + ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); mi->data_src.val = sample->data_src; return mi; @@ -1363,15 +1360,14 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->machine, al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->machine, al->thread, &bi[i].from, bs->entries[i].from); + ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); bi[i].flags = bs->entries[i].flags; } return bi; } -static int machine__resolve_callchain_sample(struct machine *machine, - struct thread *thread, +static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain, struct symbol **parent, struct addr_location *root_al, @@ -1395,7 +1391,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - skip_idx = arch_skip_callchain_idx(machine, thread, chain); + skip_idx = arch_skip_callchain_idx(thread, chain); for (i = 0; i < chain_nr; i++) { u64 ip; @@ -1437,7 +1433,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, } al.filtered = 0; - thread__find_addr_location(thread, machine, cpumode, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -1476,11 +1472,8 @@ int machine__resolve_callchain(struct machine *machine, struct addr_location *root_al, int max_stack) { - int ret; - - ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent, - root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, sample->callchain, + parent, root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8db9626f6835..bf5bf858b7f6 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -198,7 +198,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } void thread__find_cpumode_addr_location(struct thread *thread, - struct machine *machine, enum map_type type, u64 addr, struct addr_location *al) { @@ -211,8 +210,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_addr_location(thread, machine, cpumodes[i], type, - addr, al); + thread__find_addr_location(thread, cpumodes[i], type, addr, al); if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 8c75fa774706..6ef9fe6ff8da 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -54,16 +54,15 @@ void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); -void thread__find_addr_map(struct thread *thread, struct machine *machine, +void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); -void thread__find_addr_location(struct thread *thread, struct machine *machine, +void thread__find_addr_location(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, - struct machine *machine, enum map_type type, u64 addr, struct addr_location *al); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7419768c38b1..f24b350ab192 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -26,7 +26,7 @@ static int __report_module(struct addr_location *al, u64 ip, Dwfl_Module *mod; struct dso *dso = NULL; - thread__find_addr_location(ui->thread, ui->machine, + thread__find_addr_location(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, al); @@ -89,7 +89,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, addr, &al); if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 4d45c0dfe343..29acc8cccb56 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -284,7 +284,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui) { struct addr_location al; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); return al.map; } @@ -374,7 +374,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, addr, &al); if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); @@ -476,14 +476,13 @@ static void put_unwind_info(unw_addr_space_t __maybe_unused as, pr_debug("unwind: put_unwind_info called\n"); } -static int entry(u64 ip, struct thread *thread, struct machine *machine, +static int entry(u64 ip, struct thread *thread, unwind_entry_cb_t cb, void *arg) { struct unwind_entry e; struct addr_location al; - thread__find_addr_location(thread, machine, - PERF_RECORD_MISC_USER, + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); e.ip = ip; @@ -586,7 +585,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, unw_word_t ip; unw_get_reg(&c, UNW_REG_IP, &ip); - ret = ip ? entry(ip, ui->thread, ui->machine, cb, arg) : 0; + ret = ip ? entry(ip, ui->thread, cb, arg) : 0; } return ret; @@ -611,7 +610,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (ret) return ret; - ret = entry(ip, thread, machine, cb, arg); + ret = entry(ip, thread, cb, arg); if (ret) return -ENOMEM; -- cgit v1.2.3 From 00447ccdf3335ea467841fc3c7d65ffd30748895 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:42 +0200 Subject: perf tools: Add a thread stack for synthesizing call chains Add a thread stack for synthesizing call chains from call and return events. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 + tools/perf/util/event.h | 26 +++++++ tools/perf/util/thread-stack.c | 172 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread-stack.h | 32 ++++++++ tools/perf/util/thread.c | 3 + tools/perf/util/thread.h | 3 + 6 files changed, 238 insertions(+) create mode 100644 tools/perf/util/thread-stack.c create mode 100644 tools/perf/util/thread-stack.h (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3caf7dab50e8..0ebcc4ad0244 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -317,6 +317,7 @@ LIB_H += ui/util.h LIB_H += ui/ui.h LIB_H += util/data.h LIB_H += util/kvm-stat.h +LIB_H += util/thread-stack.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -394,6 +395,7 @@ LIB_OBJS += $(OUTPUT)util/srcline.o LIB_OBJS += $(OUTPUT)util/data.o LIB_OBJS += $(OUTPUT)util/tsc.o LIB_OBJS += $(OUTPUT)util/cloexec.o +LIB_OBJS += $(OUTPUT)util/thread-stack.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8c7fe9d64e79..7be389735402 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -143,6 +143,32 @@ struct branch_stack { struct branch_entry entries[0]; }; +enum { + PERF_IP_FLAG_BRANCH = 1ULL << 0, + PERF_IP_FLAG_CALL = 1ULL << 1, + PERF_IP_FLAG_RETURN = 1ULL << 2, + PERF_IP_FLAG_CONDITIONAL = 1ULL << 3, + PERF_IP_FLAG_SYSCALLRET = 1ULL << 4, + PERF_IP_FLAG_ASYNC = 1ULL << 5, + PERF_IP_FLAG_INTERRUPT = 1ULL << 6, + PERF_IP_FLAG_TX_ABORT = 1ULL << 7, + PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_IP_FLAG_TRACE_END = 1ULL << 9, + PERF_IP_FLAG_IN_TX = 1ULL << 10, +}; + +#define PERF_BRANCH_MASK (\ + PERF_IP_FLAG_BRANCH |\ + PERF_IP_FLAG_CALL |\ + PERF_IP_FLAG_RETURN |\ + PERF_IP_FLAG_CONDITIONAL |\ + PERF_IP_FLAG_SYSCALLRET |\ + PERF_IP_FLAG_ASYNC |\ + PERF_IP_FLAG_INTERRUPT |\ + PERF_IP_FLAG_TX_ABORT |\ + PERF_IP_FLAG_TRACE_BEGIN |\ + PERF_IP_FLAG_TRACE_END) + struct perf_sample { u64 ip; u32 pid, tid; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c new file mode 100644 index 000000000000..85b60d2e738f --- /dev/null +++ b/tools/perf/util/thread-stack.c @@ -0,0 +1,172 @@ +/* + * thread-stack.c: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include "thread.h" +#include "event.h" +#include "util.h" +#include "debug.h" +#include "thread-stack.h" + +#define STACK_GROWTH 4096 + +struct thread_stack_entry { + u64 ret_addr; +}; + +struct thread_stack { + struct thread_stack_entry *stack; + size_t cnt; + size_t sz; + u64 trace_nr; +}; + +static int thread_stack__grow(struct thread_stack *ts) +{ + struct thread_stack_entry *new_stack; + size_t sz, new_sz; + + new_sz = ts->sz + STACK_GROWTH; + sz = new_sz * sizeof(struct thread_stack_entry); + + new_stack = realloc(ts->stack, sz); + if (!new_stack) + return -ENOMEM; + + ts->stack = new_stack; + ts->sz = new_sz; + + return 0; +} + +static struct thread_stack *thread_stack__new(void) +{ + struct thread_stack *ts; + + ts = zalloc(sizeof(struct thread_stack)); + if (!ts) + return NULL; + + if (thread_stack__grow(ts)) { + free(ts); + return NULL; + } + + return ts; +} + +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +{ + int err = 0; + + if (ts->cnt == ts->sz) { + err = thread_stack__grow(ts); + if (err) { + pr_warning("Out of memory: discarding thread stack\n"); + ts->cnt = 0; + } + } + + ts->stack[ts->cnt++].ret_addr = ret_addr; + + return err; +} + +static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) +{ + size_t i; + + /* + * In some cases there may be functions which are not seen to return. + * For example when setjmp / longjmp has been used. Or the perf context + * switch in the kernel which doesn't stop and start tracing in exactly + * the same code path. When that happens the return address will be + * further down the stack. If the return address is not found at all, + * we assume the opposite (i.e. this is a return for a call that wasn't + * seen for some reason) and leave the stack alone. + */ + for (i = ts->cnt; i; ) { + if (ts->stack[--i].ret_addr == ret_addr) { + ts->cnt = i; + return; + } + } +} + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr) +{ + if (!thread) + return -EINVAL; + + if (!thread->ts) { + thread->ts = thread_stack__new(); + if (!thread->ts) { + pr_warning("Out of memory: no thread stack\n"); + return -ENOMEM; + } + thread->ts->trace_nr = trace_nr; + } + + /* + * When the trace is discontinuous, the trace_nr changes. In that case + * the stack might be completely invalid. Better to report nothing than + * to report something misleading, so reset the stack count to zero. + */ + if (trace_nr != thread->ts->trace_nr) { + thread->ts->trace_nr = trace_nr; + thread->ts->cnt = 0; + } + + if (flags & PERF_IP_FLAG_CALL) { + u64 ret_addr; + + if (!to_ip) + return 0; + ret_addr = from_ip + insn_len; + if (ret_addr == to_ip) + return 0; /* Zero-length calls are excluded */ + return thread_stack__push(thread->ts, ret_addr); + } else if (flags & PERF_IP_FLAG_RETURN) { + if (!from_ip) + return 0; + thread_stack__pop(thread->ts, to_ip); + } + + return 0; +} + +void thread_stack__free(struct thread *thread) +{ + if (thread->ts) { + zfree(&thread->ts->stack); + zfree(&thread->ts); + } +} + +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip) +{ + size_t i; + + if (!thread || !thread->ts) + chain->nr = 1; + else + chain->nr = min(sz, thread->ts->cnt + 1); + + chain->ips[0] = ip; + + for (i = 1; i < chain->nr; i++) + chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; +} diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h new file mode 100644 index 000000000000..7c41579aec74 --- /dev/null +++ b/tools/perf/util/thread-stack.h @@ -0,0 +1,32 @@ +/* + * thread-stack.h: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_THREAD_STACK_H +#define __PERF_THREAD_STACK_H + +#include + +#include + +struct thread; +struct ip_callchain; + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr); +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip); +void thread_stack__free(struct thread *thread); + +#endif diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index bf5bf858b7f6..a2157f0ef1df 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -4,6 +4,7 @@ #include #include "session.h" #include "thread.h" +#include "thread-stack.h" #include "util.h" #include "debug.h" #include "comm.h" @@ -66,6 +67,8 @@ void thread__delete(struct thread *thread) { struct comm *comm, *tmp; + thread_stack__free(thread); + if (thread->mg) { map_groups__put(thread->mg); thread->mg = NULL; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index d34cf5c0d0d9..160fd066a7d1 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -8,6 +8,8 @@ #include "symbol.h" #include +struct thread_stack; + struct thread { union { struct rb_node rb_node; @@ -26,6 +28,7 @@ struct thread { u64 db_id; void *priv; + struct thread_stack *ts; }; struct machine; -- cgit v1.2.3 From a84808083688d82d7f1e5786ccf5df0ff7d448cb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Nov 2014 16:16:41 +0200 Subject: perf tools: Only override the default :tid comm entry Events may still be ordered even if there are no timestamps e.g. if the data is recorded per-thread. Also synthesized COMM events have a timestamp of zero. Consequently it is better to keep comm entries even if they have a timestamp of zero. However, when a struct thread is created the command string is not known and a comm entry with a string of the form ":" is used. In that case thread->comm_set is false and the comm entry should be overridden. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415715423-15563-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/thread.c') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index a2157f0ef1df..9ebc8b1f9be5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -103,15 +103,14 @@ struct comm *thread__exec_comm(const struct thread *thread) return last; } -/* CHECKME: time should always be 0 if event aren't ordered */ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) { struct comm *new, *curr = thread__comm(thread); int err; - /* Override latest entry if it had no specific time coverage */ - if (!curr->start && !curr->exec) { + /* Override the default :tid entry */ + if (!thread->comm_set) { err = comm__override(curr, str, timestamp, exec); if (err) return err; -- cgit v1.2.3