From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:53 -0400 Subject: tracing: Ftrace dynamic ftrace_event_call support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic ftrace_event_call support to ftrace. Trace engines can add new ftrace_event_call to ftrace on the fly. Each operator function of the call takes an ftrace_event_call data structure as an argument, because these functions may be shared among several ftrace_event_calls. Changes from v13: - Define remove_subsystem_dir() always (revirt a2ca5e03), because trace_remove_event_call() uses it. - Modify syscall tracer because of ftrace_event_call change. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Przemysław Pawełczyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 16 ++++++++-------- include/trace/syscall.h | 11 +++++++---- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77ad79e..f2bd7a8f8e8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * event_trace_printk(_RET_IP_, ": " ); * } * - * static int ftrace_reg_event_(void) + * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_event_); * } @@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_(void) + * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_raw_event_); * } @@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(void) + * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) * { * int id; * @@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae..e290b86f616 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.3 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 15 +++++++++------ kernel/trace/ring_buffer.c | 15 +++++++++------ kernel/trace/trace_events.c | 24 ++++++++++++------------ kernel/trace/trace_export.c | 25 ++++++++++++++----------- kernel/trace/trace_syscalls.c | 20 +++++++++++++------- tools/perf/util/trace-event-parse.c | 24 ++++++++++++++++++++++++ tools/perf/util/trace-event.h | 1 + 7 files changed, 82 insertions(+), 42 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e18..c9bbcab95fb 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,9 +120,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +133,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff0197054..e43c928356e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d128f65778e..cf3cabf6ce1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -507,7 +507,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc61bc..31da218ee10 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __field_desc #define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __array_desc #define __array_desc(type, container, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 527e17eae57..d99abc427c3 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -103,7 +103,8 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { @@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) if (!entry) return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), SYSCALL_FIELD(long, ret)); if (!ret) @@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 55b41b9e383..be8412d699a 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -894,6 +894,21 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->size = strtoul(token, NULL, 0); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) goto fail_expect; @@ -2843,6 +2858,15 @@ static void parse_header_field(char *type, return; *size = atoi(token); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) return; if (read_expect_type(EVENT_NEWLINE, &token) < 0) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 162c3e6deb9..00b440df66d 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -26,6 +26,7 @@ enum { enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, }; struct format_field { -- cgit v1.2.3 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++ include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/trace/events/mce.h (limited to 'include/trace') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d..39caea3d8bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -141,6 +144,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 00000000000..7eee77895cb --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 11:50:39 +0200 Subject: events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/bkl.h | 18 +++++----- include/trace/events/irq.h | 8 ++--- include/trace/events/power.h | 2 -- include/trace/events/sched.h | 44 ++++++++++++------------ include/trace/events/timer.h | 79 ++++++++++++++++++++++---------------------- 5 files changed, 74 insertions(+), 77 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h index 8abd620a490..1af72dc2427 100644 --- a/include/trace/events/bkl.h +++ b/include/trace/events/bkl.h @@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field( int, lock_depth ) + __field( int, depth ) __field_ext( const char *, func, FILTER_PTR_STRING ) __field_ext( const char *, file, FILTER_PTR_STRING ) __field( int, line ) @@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel, TP_fast_assign( /* We want to record the lock_depth after lock is acquired */ - __entry->lock_depth = current->lock_depth + 1; + __entry->depth = current->lock_depth + 1; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); @@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field(int, lock_depth) - __field(const char *, func) - __field(const char *, file) - __field(int, line) + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) ), TP_fast_assign( - __entry->lock_depth = current->lock_depth; + __entry->depth = current->lock_depth; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db4a40..dcfcd440762 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); @@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); @@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579261a..9bb96e5a284 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,8 +16,6 @@ enum { }; #endif - - TRACE_EVENT(power_start, TP_PROTO(unsigned int type, unsigned int state), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f418..b50b9856c59 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); @@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig = sig; ), - TP_printk("sig: %d task %s:%d", + TP_printk("sig=%d comm=%s pid=%d", __entry->sig, __entry->comm, __entry->pid) ); @@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) @@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep, __perf_count(delay); ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait, __perf_count(delay); ), - TP_printk("task: %s:%d iowait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48d640..e5ce87a0498 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); -- cgit v1.2.3 From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Nov 2009 04:13:05 +0100 Subject: tracing, perf_events: Protect the buffer from recursion in perf While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 +++++-- include/trace/ftrace.h | 39 ++++++++++++++++++++++------- kernel/trace/trace_event_profile.c | 41 ++++++++++++++----------------- kernel/trace/trace_kprobe.c | 50 ++++++++++++++++++++++++++++++++------ kernel/trace/trace_syscalls.c | 44 +++++++++++++++++++++++++++------ 5 files changed, 133 insertions(+), 50 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f7b47c33670..43360c1d8f7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a7f94609412..4945d1c9986 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ @@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ +end_recursion: \ + trace_buf->recursion--; \ end: \ local_irq_restore(irq_flags); \ \ diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index c9f687ab0d4..e0d351b01f5 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,41 +8,36 @@ #include #include "trace.h" -/* - * We can't use a size but a type in alloc_percpu() - * So let's create a dummy type that matches the desired size - */ -typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; -char *trace_profile_buf; -EXPORT_SYMBOL_GPL(trace_profile_buf); +struct perf_trace_buf *perf_trace_buf; +EXPORT_SYMBOL_GPL(perf_trace_buf); -char *trace_profile_buf_nmi; -EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); +struct perf_trace_buf *perf_trace_buf_nmi; +EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - char *buf; + struct perf_trace_buf *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf; - rcu_assign_pointer(trace_profile_buf, buf); + rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf_nmi, buf); } ret = event->profile_enable(event); @@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - free_percpu(trace_profile_buf_nmi); - free_percpu(trace_profile_buf); - trace_profile_buf_nmi = NULL; - trace_profile_buf = NULL; + free_percpu(perf_trace_buf_nmi); + free_percpu(perf_trace_buf); + perf_trace_buf_nmi = NULL; + perf_trace_buf = NULL; } fail_buf: atomic_dec(&event->profile_count); @@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - char *buf, *nmi_buf; + struct perf_trace_buf *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; @@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) event->profile_disable(event); if (!--total_profile_count) { - buf = trace_profile_buf; - rcu_assign_pointer(trace_profile_buf, NULL); + buf = perf_trace_buf; + rcu_assign_pointer(perf_trace_buf, NULL); - nmi_buf = trace_profile_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, NULL); + nmi_buf = perf_trace_buf_nmi; + rcu_assign_pointer(perf_trace_buf_nmi, NULL); /* * Ensure every events in profiling have finished before diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cf17a6694f3..3696476f307 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kprobe_trace_entry *)raw_data; @@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } @@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kretprobe_trace_entry *)raw_data; @@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 58b8e537076..51213b0aa81 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,6 +477,7 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; + struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; char *raw_data; @@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } @@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; char *raw_data; @@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } -- cgit v1.2.3 From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 10:06:34 +0100 Subject: tracing: Rename 'lockdep' event subsystem into 'lock' Lockdep events subsystem gathers various locking related events such as a request, release, contention or acquisition of a lock. The name of this event subsystem is a bit of a misnomer since these events are not quite related to lockdep but more generally to locking, ie: these events are not reporting lock dependencies or possible deadlock scenario but pure locking events. Hence this rename. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Acked-by: Hitoshi Mitake Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/events/lock.h | 96 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/lockdep.h | 96 ------------------------------------------ kernel/lockdep.c | 2 +- 3 files changed, 97 insertions(+), 97 deletions(-) create mode 100644 include/trace/events/lock.h delete mode 100644 include/trace/events/lockdep.h (limited to 'include/trace') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h new file mode 100644 index 00000000000..a870ba125aa --- /dev/null +++ b/include/trace/events/lock.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include +#include + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h deleted file mode 100644 index bcf1d209a00..00000000000 --- a/include/trace/events/lockdep.h +++ /dev/null @@ -1,96 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#ifdef CONFIG_LOCKDEP - -TRACE_EVENT(lock_acquire, - - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - - TP_STRUCT__entry( - __field(unsigned int, flags) - __string(name, lock->name) - ), - - TP_fast_assign( - __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); - __assign_str(name, lock->name); - ), - - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", - (__entry->flags & 2) ? "read " : "", - __get_str(name)) -); - -TRACE_EVENT(lock_release, - - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - - TP_ARGS(lock, nested, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, - - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af56723c09..f5dcd36d315 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; -- cgit v1.2.3 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 ++--- include/linux/perf_event.h | 4 +++ include/trace/ftrace.h | 23 +++++++------ kernel/perf_event.c | 68 +++++++++++++++++++++++++------------- kernel/trace/trace_event_profile.c | 14 ++++---- kernel/trace/trace_kprobe.c | 48 ++++++++++----------------- kernel/trace/trace_syscalls.c | 47 ++++++++++---------------- 7 files changed, 106 insertions(+), 107 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1d8f7..47bbdf9c38d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,13 +137,8 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -struct perf_trace_buf { - char buf[FTRACE_MAX_PROFILE_SIZE]; - int recursion; -}; - -extern struct perf_trace_buf *perf_trace_buf; -extern struct perf_trace_buf *perf_trace_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f7264..74e98b1d339 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c9986..c222ef5238b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + extern int perf_swevent_get_recursion_context(int **recursion); \ + extern void perf_swevent_put_recursion_context(int *recursion); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ - struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ + int *recursion; \ int __cpu; \ int pc; \ \ @@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + if (perf_swevent_get_recursion_context(&recursion)) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - trace_buf = per_cpu_ptr(trace_buf, __cpu); \ - if (trace_buf->recursion++) \ - goto end_recursion; \ - \ - barrier(); \ - \ - raw_data = trace_buf->buf; \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end_recursion: \ - trace_buf->recursion--; \ -end: \ +end: \ + perf_swevent_put_recursion_context(recursion); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 718fa939b1a..aba82272230 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +/* + * Must be called with preemption disabled + */ +int perf_swevent_get_recursion_context(int **recursion) { + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (in_nmi()) - return &cpuctx->recursion[3]; + *recursion = &cpuctx->recursion[3]; + else if (in_irq()) + *recursion = &cpuctx->recursion[2]; + else if (in_softirq()) + *recursion = &cpuctx->recursion[1]; + else + *recursion = &cpuctx->recursion[0]; - if (in_irq()) - return &cpuctx->recursion[2]; + if (**recursion) + return -1; - if (in_softirq()) - return &cpuctx->recursion[1]; + (**recursion)++; - return &cpuctx->recursion[0]; + return 0; } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +void perf_swevent_put_recursion_context(int *recursion) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swevent_recursion_context(cpuctx); - struct perf_event_context *ctx; - - if (*recursion) - goto out; + (*recursion)--; +} - (*recursion)++; - barrier(); +static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, @@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); +} - barrier(); - (*recursion)--; +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int *recursion; + + preempt_disable(); + + if (perf_swevent_get_recursion_context(&recursion)) + goto out; + + __do_perf_sw_event(type, event_id, nr, nmi, data, regs); + perf_swevent_put_recursion_context(recursion); out: - put_cpu_var(perf_cpu_context); + preempt_enable(); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + /* Trace events already protected against recursion */ + __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e0d351b01f5..d9c60f80aa0 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -9,31 +9,33 @@ #include "trace.h" -struct perf_trace_buf *perf_trace_buf; +char *perf_trace_buf; EXPORT_SYMBOL_GPL(perf_trace_buf); -struct perf_trace_buf *perf_trace_buf_nmi; +char *perf_trace_buf_nmi; EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; + /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf; + char *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; rcu_assign_pointer(perf_trace_buf, buf); - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf_nmi; @@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf, *nmi_buf; + char *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3696476f307..22e6f68b05b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; char *raw_data; + int *recursion; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; @@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; + int *recursion; char *raw_data; pc = preempt_count(); @@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 51213b0aa81..0bb93487526 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,10 +477,11 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; - struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; + char *trace_buf; char *raw_data; + int *recursion; int syscall_nr; int size; int cpu; @@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } @@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; + char *trace_buf; char *raw_data; + int *recursion; int size; int cpu; @@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } -- cgit v1.2.3 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++--- include/trace/ftrace.h | 17 ++++++----- kernel/perf_event.c | 71 +++++++++++++++++++------------------------ kernel/trace/trace_kprobe.c | 14 +++++---- kernel/trace/trace_syscalls.c | 14 +++++---- 5 files changed, 61 insertions(+), 63 deletions(-) (limited to 'include/trace') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1d339..43adbd7f001 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c222ef5238b..c3417c13e3e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(int **recursion); \ - extern void perf_swevent_put_recursion_context(int *recursion); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ @@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto) \ int __data_size; \ char *trace_buf; \ char *raw_data; \ - int *recursion; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto) \ \ local_irq_save(irq_flags); \ \ - if (perf_swevent_get_recursion_context(&recursion)) \ - goto end_recursion; \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ \ __cpu = smp_processor_id(); \ \ @@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(recursion); \ -end_recursion: \ +end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 50f11b5f8c3..0b0d5f72fe7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -/* - * Must be called with preemption disabled - */ -int perf_swevent_get_recursion_context(int **recursion) +int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int rctx; if (in_nmi()) - *recursion = &cpuctx->recursion[3]; + rctx = 3; else if (in_irq()) - *recursion = &cpuctx->recursion[2]; + rctx = 2; else if (in_softirq()) - *recursion = &cpuctx->recursion[1]; + rctx = 1; else - *recursion = &cpuctx->recursion[0]; + rctx = 0; - if (**recursion) + if (cpuctx->recursion[rctx]) { + put_cpu_var(perf_cpu_context); return -1; + } - (**recursion)++; + cpuctx->recursion[rctx]++; + barrier(); - return 0; + return rctx; } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); -void perf_swevent_put_recursion_context(int *recursion) +void perf_swevent_put_recursion_context(int rctx) { - (*recursion)--; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + barrier(); + cpuctx->recursion[rctx]++; + put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); @@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, rcu_read_unlock(); } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int *recursion; - - preempt_disable(); - - if (perf_swevent_get_recursion_context(&recursion)) - goto out; - - __do_perf_sw_event(type, event_id, nr, nmi, data, regs); - - perf_swevent_put_recursion_context(recursion); -out: - preempt_enable(); -} - void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; + int rctx; + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + return; data.addr = addr; data.raw = NULL; do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + + perf_swevent_put_recursion_context(rctx); } static void perf_swevent_read(struct perf_event *event) @@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, regs = task_pt_regs(current); /* Trace events already protected against recursion */ - __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 22e6f68b05b..79ce6a2bd74 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, unsigned long irq_flags; char *trace_buf; char *raw_data; - int *recursion; + int rctx; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, perf_tp_event(call->id, entry->ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); @@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, int size, __size, i, pc, __cpu; unsigned long irq_flags; char *trace_buf; - int *recursion; char *raw_data; + int rctx; pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); @@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, perf_tp_event(call->id, entry->ret_ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 41b6dd963da..9189cbe8607 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) unsigned long flags; char *trace_buf; char *raw_data; - int *recursion; int syscall_nr; + int rctx; int size; int cpu; @@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) perf_tp_event(sys_data->enter_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } @@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; char *trace_buf; char *raw_data; - int *recursion; + int rctx; int size; int cpu; @@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } -- cgit v1.2.3 From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:27:27 -0500 Subject: tracing: Create new TRACE_EVENT_TEMPLATE There are some places in the kernel that define several tracepoints and they are all identical besides the name. The code to enable, disable and record is created for every trace point even if most of the code is identical. This patch adds TRACE_EVENT_TEMPLATE that lets the developer create a template TRACE_EVENT and create trace points with DEFINE_EVENT, which is based off of a given template. Each trace point used by this will share most of the code, and bring down the size of the kernel when there are several duplicate events. Usage is: TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print); Which would be the same as defining a normal TRACE_EVENT. To create the trace events that the trace points will use: DEFINE_EVENT(template, name, proto, args) is done. The template is the name of the TRACE_EVENT_TEMPLATE to use. The name is the name of the trace point. The parameters proto and args must be the same as the proto and args of the template. If they are not the same, then a compile error will result. I tried hard removing this duplication but the C preprocessor is not powerful enough (or my CPP magic experience points is not at a high enough level) to not need them. A lot of trace events are coming in with new XFS development. Most of the trace points are identical except for the name. The following shows the advantage of having TRACE_EVENT_TEMPLATE: $ size fs/xfs/xfs.o.* text data bss dec hex filename 452114 2788 3520 458422 6feb6 fs/xfs/xfs.o.old 638482 38116 3744 680342 a6196 fs/xfs/xfs.o.template 996954 38116 4480 1039550 fdcbe fs/xfs/xfs.o.trace xfs.o.old is without any tracepoints. xfs.o.template uses the new TRACE_EVENT_TEMPLATE. xfs.o.trace uses the current TRACE_EVENT macros. Requested-by: Christoph Hellwig Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 4 ++ include/trace/define_trace.h | 6 ++ include/trace/ftrace.h | 149 +++++++++++++++++++++++++++++++------------ 3 files changed, 117 insertions(+), 42 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a83e89..88a5b5a809e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf7403..244985814a4 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +67,8 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_TEMPLATE +#undef DEFINE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c3417c13e3e..2969f65d800 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include +/* + * TRACE_EVENT_TEMPLATE can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + TRACE_EVENT_TEMPLATE(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,13 +56,15 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name #undef __cpparg @@ -89,12 +111,15 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -170,8 +195,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ - if (entry->type != event_##call.id) { \ + if (entry->type != event_id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ } \ @@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - \ -static struct ftrace_event_call event_##call; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ @@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ extern int perf_swevent_get_recursion_context(void); \ extern void perf_swevent_put_recursion_context(int rctx); \ - struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ @@ -789,6 +845,15 @@ end_recursion: \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.3 From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:36:26 -0500 Subject: tracing: Create new DEFINE_EVENT_PRINT After creating the TRACE_EVENT_TEMPLATE I started to look at other trace points to see what duplication was made. I noticed that there are several trace points where they are almost identical except for the name and the output format. Since TRACE_EVENT_TEMPLATE was successful in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT. DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the developer to overwrite the print format. If there are two or more TRACE_EVENTS that are identical except for the name and print, then they can be converted to use a TRACE_EVENT_TEMPLATE. Since the TRACE_EVENT_TEMPLATE already does the print output, the first trace event would have its print format held in the TRACE_EVENT_TEMPLATE and be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT and override the print format. Converting the sched trace points to both DEFINE_EVENT and DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were converted to DEFINE_EVENT_PRINT. I was able to get the following: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is the scheduler compiled with no trace points. sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT sched.o-trace is the current trace events. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 + include/trace/define_trace.h | 5 ++ include/trace/ftrace.h | 123 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88a5b5a809e..7063383cca1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 244985814a4..5d7d855ae21 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -35,6 +35,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -69,6 +73,7 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_TEMPLATE #undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2969f65d800..b0461772bc8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -67,6 +67,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -120,6 +124,10 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -198,15 +206,28 @@ #undef TRACE_EVENT_TEMPLATE #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: " print); \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } @@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ #name, iter, flags); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call( \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto) \ ftrace_profile_templ_##template(event_call, args); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.3 From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:48:08 -0500 Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT Converting some of the scheduler trace events to use the TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to save some space: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is without any tracepoints compiled sched.o-templ is with this patch sched.o-trace is the tracepoints before this patch The trace events converted to DEFINE_EVENT: sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit, and sched_stat_wait. The trace events converted to DEFINE_EVENT_PRINT: sched_stat_sleep and sched_stat_iowait. Note, since the TRACE_EVENT_TEMPLATE always uses a print, the sched_stat_wait print format is defined in the template and this template is used by sched_stat_sleep and sched_stat_iowait. But the later two override the print format. Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 170 +++++++++++++------------------------------ 1 file changed, 52 insertions(+), 118 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b50b9856c59..238f74b5848 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +TRACE_EVENT_TEMPLATE(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup, __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, target_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->target_cpu = task_cpu(p); - ), - - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +TRACE_EVENT_TEMPLATE(sched_process_template, TP_PROTO(struct task_struct *p), @@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free, ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +TRACE_EVENT_TEMPLATE(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 09:04:55 +0100 Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS() It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE does: does it define an event as well beyond defining a template? To clarify this, rename it to DECLARE_EVENT_CLASS, which follows the various 'DECLARE_*()' idioms we already have in the kernel: DECLARE_EVENT_CLASS(class) DEFINE_EVENT(class, event1) DEFINE_EVENT(class, event2) DEFINE_EVENT(class, event3) To complete this logic we should also rename TRACE_EVENT() to: DEFINE_SINGLE_EVENT(single_event) ... but in a more quiet moment of the kernel cycle. Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/sched.h | 6 +++--- include/trace/ftrace.h | 46 ++++++++++++++++++++++---------------------- 4 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7063383cca1..f59604ed0ec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5d7d855ae21..5acfb1eb4df 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -71,7 +71,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_TEMPLATE +#undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 238f74b5848..5ce79502185 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT_TEMPLATE(sched_wakeup_template, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -TRACE_EVENT_TEMPLATE(sched_process_template, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -TRACE_EVENT_TEMPLATE(sched_stat_template, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b0461772bc8..2c9c073e45a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -19,17 +19,17 @@ #include /* - * TRACE_EVENT_TEMPLATE can be used to add a generic function + * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same * parameters and just have distinct trace points. * Each tracepoint can be defined with DEFINE_EVENT and that - * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * will map the DECLARE_EVENT_CLASS to the tracepoint. * * TRACE_EVENT is a one to one mapping between tracepoint and template. */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - TRACE_EVENT_TEMPLATE(name, \ + DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ @@ -56,8 +56,8 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ @@ -115,8 +115,8 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -203,8 +203,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_setup_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ ftrace_raw_output_id_##call(int event_id, const char *name, \ struct trace_iterator *iter, int flags) \ @@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __string #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ @@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ proto) \ @@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static void \ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ -- cgit v1.2.3 From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:03:23 +0800 Subject: tracing: Convert module refcnt events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 29854 1980 128 31962 7cda kernel/module.o.old 28750 1980 128 30858 788a kernel/module.o Two events are converted: module_refcnt: module_get, module_put No change in functionality. Signed-off-by: Li Zefan Cc: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/module.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb1847..4b0f48ba16a 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, -- cgit v1.2.3 From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:10 +0800 Subject: tracing: Convert some kmem events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 333987 69800 27228 431015 693a7 mm/built-in.o.old 330030 69800 27228 427058 68432 mm/built-in.o 8 events are converted: kmem_alloc: kmalloc, kmem_cache_alloc kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node kmem_free: kfree, kmem_cache_free mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain No change in functionality. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/kmem.h | 130 ++++++++++++++------------------------------ mm/page_alloc.c | 4 +- mm/util.c | 3 - 3 files changed, 43 insertions(+), 94 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bdd18a..3adca0ca9db 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41..bdb22f55d00 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,12 +48,14 @@ #include #include #include -#include #include #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* * Array of node states. */ diff --git a/mm/util.c b/mm/util.c index 7c35ad95f92..15d197571b4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,9 +6,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v1.2.3 From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:31 +0800 Subject: tracing: Convert softirq events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 12781 952 36 13769 35c9 kernel/softirq.o.old 11981 952 32 12965 32a5 kernel/softirq.o Two events are converted: softirq: softirq_entry, softirq_exit No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/irq.h | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index dcfcd440762..0e4cfb694fe 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry, show_softirq_name(__entry->vec)) ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + /** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("vec=%d [action=%s]", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3 From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:03 +0800 Subject: tracing: Convert some workqueue events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 13171 800 72 14043 36db kernel/workqueue.o.old 12243 800 68 13111 3337 kernel/workqueue.o Two events are converted: workqueue: workqueue_insertion, workqueue_execution No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/workqueue.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612dbd7ba..d6c974474e7 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ -- cgit v1.2.3 From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:38 +0800 Subject: tracing: Convert some power events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 4312 524 12 4848 12f0 kernel/trace/power-traces.o.old 3455 524 8 3987 f93 kernel/trace/power-traces.o Two events are converted: power: power_start, power_frequency No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9bb96e5a284..c4efe9b8280 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,7 +16,7 @@ enum { }; #endif -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -35,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ -- cgit v1.2.3 From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:14 +0800 Subject: tracing: Convert some block events to DEFINE_EVENT use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 53570 3284 184 57038 dece block/blk-core.o.old 43702 3284 144 47130 b81a block/blk-core.o 12 events are converted: block_rq: block_rq_insert, block_rq_issue block_rq_with_error: block_rq_{abort, requeue, complete} block_bio: block_bio_{backmerge, frontmerge, queue} block_get_rq: block_getrq, block_sleeprq block_unplug: block_unplug_timer, block_unplug_io No change in functionality. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/block.h | 202 +++++++++---------------------------------- 1 file changed, 42 insertions(+), 160 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5f624..5fb72733331 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, -- cgit v1.2.3 From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:55 +0800 Subject: tracing: Convert some jbd2 events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 34903 1693 448 37044 90b4 fs/jbd2/journal.o.old 31931 1693 416 34040 84f8 fs/jbd2/journal.o Four events are converted: jbd2_commit: jbd2_start_commit, jbd2_commit_{locking, flushing, logging} No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/jbd2.h | 63 ++++++++------------------------------------- 1 file changed, 11 insertions(+), 52 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75adb9..96b370a050d 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, -- cgit v1.2.3 From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:07:36 +0800 Subject: tracing: Convert some ext4 events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 294695 6104 340 301139 49853 fs/ext4/ext4.o.old 289983 6104 324 296411 485db fs/ext4/ext4.o 5 events are convertd: ext4__write_begin: ext4_write_begin, ext4_da_write_begin ext4__write_end: ext4_{ordered, writeback, journalled}_write_end No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/ext4.h | 129 ++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 94 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f9..318f76535bd 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.2.3 From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:08:01 +0800 Subject: tracing: Restore original format of sched events The original format for sched_stat_iowait and sched_stat_sleep: $ cat events/sched/sched_stat_iowait/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 ("tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT") broke the format: $ cat events/sched/sched_stat_iowait/format print fmt: "task: %s:%d iowait: %Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format print fmt: "task: %s:%d sleep: %Lu [ns]", ... No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5ce79502185..9d316b22388 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting runtime (time the task is executing -- cgit v1.2.3 From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:45 -0500 Subject: tracepoint: Move signal sending tracepoint to events/signal.h Move signal sending event to events/signal.h. This patch also renames sched_signal_send event to signal_generate. Changes in v4: - Fix a typo of task_struct pointer. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Add siginfo storing macro Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/tracepoint.tmpl | 5 +++ include/trace/events/sched.h | 25 ------------- include/trace/events/signal.h | 66 +++++++++++++++++++++++++++++++++++ kernel/signal.c | 5 +-- 4 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 include/trace/events/signal.h (limited to 'include/trace') diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd57..8bca1d5cec0 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h + + SIGNAL +!Iinclude/trace/events/signal.h + + diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9d316b22388..cfceb0b73e2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -287,31 +287,6 @@ TRACE_EVENT(sched_process_fork, __entry->child_comm, __entry->child_pid) ); -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig=%d comm=%s pid=%d", - __entry->sig, __entry->comm, __entry->pid) -); - /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 00000000000..ef51756a801 --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,66 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include +#include +#include + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/signal.c b/kernel/signal.c index 6705320784f..a1e0cc6b32c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,8 @@ #include #include #include -#include +#define CREATE_TRACE_POINTS +#include #include #include @@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigqueue *q; int override_rlimit; - trace_sched_signal_send(sig, t); + trace_signal_generate(sig, info, t); assert_spin_locked(&t->sighand->siglock); -- cgit v1.2.3 From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:51 -0500 Subject: tracepoint: Add signal deliver event Add a tracepoint where a process gets a signal. This tracepoint shows signal-number, sa-handler and sa-flag. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Fix comment Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 39 +++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 3 +++ 2 files changed, 42 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index ef51756a801..a6d71de0dc0 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate, __entry->comm, __entry->pid) ); +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index a1e0cc6b32c..349d4493740 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1840,6 +1840,9 @@ relock: ka = &sighand->action[signr-1]; } + /* Trace actually delivered signals. */ + trace_signal_deliver(signr, info, ka); + if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { -- cgit v1.2.3 From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:58 -0500 Subject: tracepoint: Add signal loss events Add signal_overflow_fail and signal_lose_info tracepoints for signal-lost events. Changes in v3: - Add docbook style comments Changes in v2: - Use siginfo string macro Suggested-by: Roland McGrath Reviewed-by: Jason Baron Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 68 +++++++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 19 ++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a6d71de0dc0..a510b75ac30 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver, __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index 349d4493740..93e72e5feae 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -897,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; } } else if (!is_si_special(info)) { - if (sig >= SIGRTMIN && info->si_code != SI_USER) - /* - * Queue overflow, abort. We may abort if the signal was rt - * and sent by user using something other than kill(). - */ + if (sig >= SIGRTMIN && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the + * signal was rt and sent by user using something + * other than kill(). + */ + trace_signal_overflow_fail(sig, group, info); return -EAGAIN; + } else { + /* + * This is a silent loss of information. We still + * send the signal, but the *info bits are lost. + */ + trace_signal_lose_info(sig, group, info); + } } out_set: -- cgit v1.2.3 From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:20 +0800 Subject: trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit fix event_enter_##sname->event fix event_exit_##sname->event remove unused event_syscall_enter and event_syscall_exit Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D278.4090209@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 2 -- kernel/trace/trace_syscalls.c | 8 -------- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b50974a93af..2f7c539ab96 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ + .event = &enter_syscall_print_##sname, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ @@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ + .event = &exit_syscall_print_##sname, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 51ee17d3632..5f8827c92db 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 63aa8070365..00d6e176f5b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -struct trace_event event_syscall_enter = { - .trace = print_syscall_enter, -}; - -struct trace_event event_syscall_exit = { - .trace = print_syscall_exit, -}; - int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.2.3 From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:30 +0800 Subject: trace_syscalls: Set event_enter_##sname->data to its metadata Set event_enter_##sname->data to its metadata, it makes codes simpler. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D282.7050709@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 6 ++++-- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 36 +++++++++++------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f7c539ab96..d3c9fd01a11 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ @@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ @@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5f8827c92db..c5265c81c4e 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,7 +34,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(char *name); +extern int syscall_name_to_nr(const char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 00d6e176f5b..39649b1675d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(char *name) +int syscall_name_to_nr(const char *name) { int i; @@ -172,18 +172,11 @@ extern char *__bad_type_size(void); int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; - int nr; int ret; - struct syscall_metadata *entry; + struct syscall_metadata *entry = call->data; struct syscall_trace_enter trace; int offset = offsetof(struct syscall_trace_enter, args); - nr = syscall_name_to_nr(call->data); - entry = syscall_nr_to_meta(nr); - - if (!entry) - return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); @@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; - struct syscall_metadata *meta; + struct syscall_metadata *meta = call->data; int ret; - int nr; int i; int offset = offsetof(typeof(trace), args); - nr = syscall_name_to_nr(call->data); - meta = syscall_nr_to_meta(nr); - - if (!meta) - return 0; - ret = trace_define_common_fields(call); if (ret) return ret; @@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; @@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; -- cgit v1.2.3 From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:36 +0800 Subject: trace_syscalls: Remove enter_id exit_id use ->enter_event->id instead of ->enter_id use ->exit_event->id instead of ->exit_id Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D288.7030001@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 2 -- include/trace/syscall.h | 6 ------ kernel/trace/trace_syscalls.c | 30 ++++++++++-------------------- 3 files changed, 10 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3c9fd01a11..b9af87560ad 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ return 0; \ } \ @@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ return 0; \ } \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c5265c81c4e..ca09561cd57 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -15,8 +15,6 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ @@ -25,8 +23,6 @@ struct syscall_metadata { int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; @@ -35,8 +31,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(const char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 39649b1675d..27eb18d6922 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name) return -1; } -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_id != ent->type) { + if (entry->enter_event->id != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_id != ent->type) { + if (entry->exit_event->id != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, - size, 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->enter_event->id, size, 0, 0); if (!event) return; @@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, - sizeof(*entry), 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->exit_event->id, sizeof(*entry), 0, 0); if (!event) return; @@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; + rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); @@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_id; + rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); -- cgit v1.2.3 From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:47 +0800 Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata Add syscall_nr field to struct syscall_metadata, it helps us to get syscall number easier. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D293.6090800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 3 ++- kernel/trace/trace_syscalls.c | 22 +++++++++------------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b9af87560ad..3c280d7ecb7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&enter_syscall_print_##sname);\ @@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&exit_syscall_print_##sname);\ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index ca09561cd57..1531eef3071 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,6 +12,7 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) @@ -20,6 +21,7 @@ */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; @@ -30,7 +32,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(const char *name); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 27eb18d6922..144cc14d855 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(const char *name) +static int syscall_name_to_nr(const char *name) { int i; @@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void) for (i = 0; i < NR_syscalls; i++) { addr = arch_syscall_addr(i); meta = find_syscall_meta(addr); + if (!meta) + continue; + + meta->syscall_nr = i; syscalls_metadata[i] = meta; } -- cgit v1.2.3 From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:55 +0800 Subject: trace_syscalls: Remove duplicate init_enter_##sname() use only one init_syscall_trace instead of many init_enter_##sname()/init_exit_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 30 ++---------------------------- include/trace/syscall.h | 1 + kernel/trace/trace_syscalls.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3c280d7ecb7..cf0d923ea40 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_enter_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_exit_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 1531eef3071..dff9371e527 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,6 +32,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 144cc14d855..c6514093c95 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } +int init_syscall_trace(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + return 0; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.2.3 From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:01 +0800 Subject: trace_syscalls: Simplify syscall profile use only one prof_sysenter_enable() instead of prof_sysenter_enable_##sname() use only one prof_sysenter_disable() instead of prof_sysenter_disable_##sname() use only one prof_sysexit_enable() instead of prof_sysexit_enable_##sname() use only one prof_sysexit_disable() instead of prof_sysexit_disable_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 31 ++++--------------------------- include/trace/syscall.h | 8 ++++---- kernel/trace/trace_syscalls.c | 24 ++++++++---------------- 3 files changed, 16 insertions(+), 47 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf0d923ea40..c2df3a59323 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dff9371e527..961fda3556b 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6514093c95..1e85b6cc26a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -520,14 +520,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_enter(char *name) +int prof_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_enter) @@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name) return ret; } -void unreg_prof_syscall_enter(char *name) +void prof_sysenter_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_enter--; @@ -625,14 +621,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_exit(char *name) +int prof_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_exit) @@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name) return ret; } -void unreg_prof_syscall_exit(char *name) +void prof_sysexit_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_exit--; -- cgit v1.2.3