summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-trace.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-12-18 14:39:00 +0100
committerIngo Molnar <mingo@kernel.org>2018-12-18 14:39:00 +0100
commitca46afdb2754dbb4a5d5772332fa16957d9bc618 (patch)
tree7c57056770c8a1621555b58d2e52625955376cfa /tools/perf/builtin-trace.c
parent8162b3d1a728cf63abf54be4167dd9beec5d9d37 (diff)
parent028713aa8389d960cb1935a9954327bdaa163cf8 (diff)
Merge tag 'perf-core-for-mingo-4.21-20181217' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Introduce 'perf record --aio' to use asynchronous IO trace writing, disabled by default (Alexey Budankov) - Add fallback routines to be used in places where we don't have the CPU mode (kernel/userspace/hypervisor) and thus must first fallback lookups looking at all map trees when trying to resolve symbols (Adrian Hunter) - Fix error with config term "pt=0", where we should just force "pt=1" and warn the user about the former being nonsensical (Adrian Hunter) - Fix 'perf test' entry where we expect 'sleep' to come in a PERF_RECORD_COMM but instead we get 'coreutils' when sleep is provided by some versions of the 'coreutils' package (Adrian Hunter) - Introduce 'perf top --kallsyms file' to match 'perf report --kallsyms', useful when dealing with BPF, where symbol resolution happens via kallsyms, not via the default vmlinux ELF symtabs (Arnaldo Carvalho de Melo) - Support 'srccode' output field in 'perf script' (Andi Kleen) - Introduce basic 'perf annotation' support for the ARC architecture (Eugeniy Paltsev) - Compute and display average IPC and IPC coverage per symbol in 'perf annotate' and 'perf report' (Jin Yao) - Make 'perf top' use ordered_events and process histograms in a separate thread (Jiri Olsa) - Make 'perf trace' use ordered_events (Jiri Olsa) - Add support for ETMv3 and PTMv1.1 decoding in cs-etm (Mathieu Poirier) - Support for ARM A32/T32 instruction sets in CoreSight trace (cs-etm) (Robert Walker) - Fix 'perf stat' shadow stats for clock events. (Ravi Bangoria) - Remove needless rb_tree extra indirection from map__find() (Eric Saint-Etienne) - Fix CSV mode column output for non-cgroup events in 'perf stat' (Stephane Eranian) - Add sanity check to libtraceevent's is_timestamp_in_us() (Tzvetomir Stoyanov) - Use ERR_CAST instead of ERR_PTR(PTR_ERR()) (Wen Yang) - Fix Load_Miss_Real_Latency on SKL/SKX intel vendor event files (Andi Kleen) - strncpy() fixes triggered by new warnings on gcc 8.2.0 (Arnaldo Carvalho de Melo) - Handle tracefs syscall tracepoint older 'nr' field in 'perf trace', that got renamed to '__syscall_nr' to work in older kernels (Arnaldo Carvalho de Melo) - Give better hint about devel package for libssl (Arnaldo Carvalho de Melo) - Fix the 'perf trace' build in architectures lacking explicit mmap.h file (Arnaldo Carvalho de Melo) - Remove extra rb_tree traversal indirection from map__find() (Eric Saint-Etienne) - Disable breakpoint tests for 32-bit ARM (Florian Fainelli) - Fix typos all over the place, mostly in comments, but also in some debug messages and JSON files (Ingo Molnar) - Allow specifying proc-map-timeout in config file (Mark Drayton) - Fix mmap_flags table generation script (Sihyeon Jang) - Fix 'size' parameter to snprintf in the 'perf config' code (Sihyeon Jang) - More libtraceevent renames to make it a proper library (Tzvetomir Stoyanov) - Implement new API tep_get_ref() in libtraceevent (Tzvetomir Stoyanov) - Added support for pkg-config in libtraceevent (Tzvetomir Stoyanov) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r--tools/perf/builtin-trace.c87
1 files changed, 72 insertions, 15 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8e3c3f74a3a4..366ec3c8f580 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -127,6 +127,10 @@ struct trace {
bool force;
bool vfs_getname;
int trace_pgfaults;
+ struct {
+ struct ordered_events data;
+ u64 last;
+ } oe;
};
struct tp_field {
@@ -258,7 +262,8 @@ static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
if (evsel->priv != NULL) {
- if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
+ if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
+ perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
goto out_delete;
return 0;
}
@@ -885,7 +890,7 @@ static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
* args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
*/
struct syscall {
- struct tep_event_format *tp_format;
+ struct tep_event *tp_format;
int nr_args;
int args_size;
bool is_exit;
@@ -1264,7 +1269,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->threads, trace__tool_process, false,
- trace->opts.proc_map_timeout, 1);
+ 1);
out:
if (err)
symbol__exit();
@@ -2636,6 +2641,57 @@ static int trace__set_filter_pids(struct trace *trace)
return err;
}
+static int trace__deliver_event(struct trace *trace, union perf_event *event)
+{
+ struct perf_evlist *evlist = trace->evlist;
+ struct perf_sample sample;
+ int err;
+
+ err = perf_evlist__parse_sample(evlist, event, &sample);
+ if (err)
+ fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
+ else
+ trace__handle_event(trace, event, &sample);
+
+ return 0;
+}
+
+static int trace__flush_ordered_events(struct trace *trace)
+{
+ u64 first = ordered_events__first_time(&trace->oe.data);
+ u64 flush = trace->oe.last - NSEC_PER_SEC;
+
+ /* Is there some thing to flush.. */
+ if (first && first < flush)
+ return ordered_events__flush_time(&trace->oe.data, flush);
+
+ return 0;
+}
+
+static int trace__deliver_ordered_event(struct trace *trace, union perf_event *event)
+{
+ struct perf_evlist *evlist = trace->evlist;
+ int err;
+
+ err = perf_evlist__parse_sample_timestamp(evlist, event, &trace->oe.last);
+ if (err && err != -1)
+ return err;
+
+ err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
+ if (err)
+ return err;
+
+ return trace__flush_ordered_events(trace);
+}
+
+static int ordered_events__deliver_event(struct ordered_events *oe,
+ struct ordered_event *event)
+{
+ struct trace *trace = container_of(oe, struct trace, oe.data);
+
+ return trace__deliver_event(trace, event->event);
+}
+
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct perf_evlist *evlist = trace->evlist;
@@ -2782,7 +2838,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
* Now that we already used evsel->attr to ask the kernel to setup the
* events, lets reuse evsel->attr.sample_max_stack as the limit in
* trace__resolve_callchain(), allowing per-event max-stack settings
- * to override an explicitely set --max-stack global setting.
+ * to override an explicitly set --max-stack global setting.
*/
evlist__for_each_entry(evlist, evsel) {
if (evsel__has_callchain(evsel) &&
@@ -2801,18 +2857,12 @@ again:
continue;
while ((event = perf_mmap__read_event(md)) != NULL) {
- struct perf_sample sample;
-
++trace->nr_events;
- err = perf_evlist__parse_sample(evlist, event, &sample);
- if (err) {
- fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
- goto next_event;
- }
+ err = trace__deliver_ordered_event(trace, event);
+ if (err)
+ goto out_disable;
- trace__handle_event(trace, event, &sample);
-next_event:
perf_mmap__consume(md);
if (interrupted)
@@ -2834,6 +2884,9 @@ next_event:
draining = true;
goto again;
+ } else {
+ if (trace__flush_ordered_events(trace))
+ goto out_disable;
}
} else {
goto again;
@@ -2844,6 +2897,8 @@ out_disable:
perf_evlist__disable(evlist);
+ ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
+
if (!err) {
if (trace->summary)
trace__fprintf_thread_summary(trace, trace->output);
@@ -3393,7 +3448,6 @@ int cmd_trace(int argc, const char **argv)
.user_interval = ULLONG_MAX,
.no_buffering = true,
.mmap_pages = UINT_MAX,
- .proc_map_timeout = 500,
},
.output = stderr,
.show_comm = true,
@@ -3464,7 +3518,7 @@ int cmd_trace(int argc, const char **argv)
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
- OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+ OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
@@ -3555,6 +3609,9 @@ int cmd_trace(int argc, const char **argv)
}
}
+ ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
+ ordered_events__set_copy_on_queue(&trace.oe.data, true);
+
/*
* If we are augmenting syscalls, then combine what we put in the
* __augmented_syscalls__ BPF map with what is in the