From 2e2ca155cd2213b4f398031180fb3d399d5b7db9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2008 12:26:40 -0400 Subject: ftrace: new continue entry - separate out from trace_entry Some tracers will need to work with more than one entry. In order to do this the trace_entry structure was split into two fields. One for the start of all entries, and one to continue an existing entry. The trace_entry structure now has a "field" entry that consists of the previous content of the trace_entry, and a "cont" entry that is just a string buffer the size of the "field" entry. Thanks to Andrew Morton for suggesting this idea. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f69f86788c2..6ddd6a6556c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -61,13 +61,12 @@ struct stack_entry { }; /* - * The trace entry - the most basic unit of tracing. This is what + * The trace field - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: * * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ -struct trace_entry { - char type; +struct trace_field { char cpu; char flags; char preempt_count; @@ -83,6 +82,18 @@ struct trace_entry { }; }; +struct trace_field_cont { + char buf[sizeof(struct trace_field)]; +}; + +struct trace_entry { + char type; + union { + struct trace_field field; + struct trace_field_cont cont; + }; +}; + #define TRACE_ENTRY_SIZE sizeof(struct trace_entry) /* -- cgit v1.2.3 From dd0e545f061f90099a3dcc13aa77e29c6295cf23 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2008 12:26:41 -0400 Subject: ftrace: printk formatting infrastructure This patch adds a feature that can help kernel developers debug their code using ftrace. int ftrace_printk(const char *fmt, ...); This records into the ftrace buffer using printf formatting. The entry size in the buffers are still a fixed length. A new type has been added that allows for more entries to be used for a single recording. The start of the print is still the same as the other entries. It returns the number of characters written to the ftrace buffer. For example: Having a module with the following code: static int __init ftrace_print_test(void) { ftrace_printk("jiffies are %ld\n", jiffies); return 0; } Gives me: insmod-5441 3...1 7569us : ftrace_print_test: jiffies are 4296626666 for the latency_trace file and: insmod-5441 [03] 1959.370498: ftrace_print_test jiffies are 4296626666 for the trace file. Note: Only the infrastructure should go into the kernel. It is to help facilitate debugging for other kernel developers. Calls to ftrace_printk is not intended to be left in the kernel, and should be frowned upon just like scattering printks around in the code. But having this easily at your fingertips helps the debugging go faster and bugs be solved quicker. Maybe later on, we can hook this with markers and have their printf format be sucked into ftrace output. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 10 ++ kernel/trace/trace.c | 273 +++++++++++++++++++++++++++++++++++++----- kernel/trace/trace.h | 11 ++ kernel/trace/trace_selftest.c | 11 +- 4 files changed, 272 insertions(+), 33 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6b232a2460c..f53b975e32f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -157,9 +157,18 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x) +extern int +__ftrace_printk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +static inline int +ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))) +{ + return 0; +} #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD @@ -173,4 +182,5 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } static inline void ftrace_release(void *start, unsigned long size) { } #endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 76dfe6d2466..a917bea8271 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -197,12 +197,14 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler + * CONT - multiple entries hold the trace item */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED = 0x02, TRACE_FLAG_HARDIRQ = 0x04, TRACE_FLAG_SOFTIRQ = 0x08, + TRACE_FLAG_CONT = 0x10, }; /* @@ -1074,6 +1076,7 @@ enum trace_file_type { TRACE_FILE_LAT_FMT = 1, }; +/* Return the current entry. */ static struct trace_entry * trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, struct trace_iterator *iter, int cpu) @@ -1104,8 +1107,58 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return &array[iter->next_page_idx[cpu]]; } +/* Increment the index counter of an iterator by one */ +static void trace_iterator_increment(struct trace_iterator *iter, int cpu) +{ + iter->idx++; + iter->next_idx[cpu]++; + iter->next_page_idx[cpu]++; + + if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) { + struct trace_array_cpu *data = iter->tr->data[cpu]; + + iter->next_page_idx[cpu] = 0; + iter->next_page[cpu] = + trace_next_list(data, iter->next_page[cpu]); + } +} + static struct trace_entry * -find_next_entry(struct trace_iterator *iter, int *ent_cpu) +trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data, + struct trace_iterator *iter, int cpu) +{ + struct list_head *next_page; + struct trace_entry *ent; + int idx, next_idx, next_page_idx; + + ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); + + if (likely(!ent || ent->type != TRACE_CONT)) + return ent; + + /* save the iterator details */ + idx = iter->idx; + next_idx = iter->next_idx[cpu]; + next_page_idx = iter->next_page_idx[cpu]; + next_page = iter->next_page[cpu]; + + /* find a real entry */ + do { + trace_iterator_increment(iter, cpu); + ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); + } while (ent && ent->type != TRACE_CONT); + + /* reset the iterator */ + iter->idx = idx; + iter->next_idx[cpu] = next_idx; + iter->next_page_idx[cpu] = next_page_idx; + iter->next_page[cpu] = next_page; + + return ent; +} + +static struct trace_entry * +__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc) { struct trace_array *tr = iter->tr; struct trace_entry *ent, *next = NULL; @@ -1115,7 +1168,23 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) for_each_tracing_cpu(cpu) { if (!head_page(tr->data[cpu])) continue; + ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); + + if (ent && ent->type == TRACE_CONT) { + struct trace_array_cpu *data = tr->data[cpu]; + + if (!inc) + ent = trace_entry_next(tr, data, iter, cpu); + else { + while (ent && ent->type == TRACE_CONT) { + trace_iterator_increment(iter, cpu); + ent = trace_entry_idx(tr, tr->data[cpu], + iter, cpu); + } + } + } + /* * Pick the entry with the smallest timestamp: */ @@ -1131,25 +1200,39 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static void trace_iterator_increment(struct trace_iterator *iter) +/* Find the next real entry, without updating the iterator itself */ +static struct trace_entry * +find_next_entry(struct trace_iterator *iter, int *ent_cpu) { - iter->idx++; - iter->next_idx[iter->cpu]++; - iter->next_page_idx[iter->cpu]++; + return __find_next_entry(iter, ent_cpu, 0); +} + +/* Find the next real entry, and increment the iterator to the next entry */ +static void *find_next_entry_inc(struct trace_iterator *iter) +{ + struct trace_entry *next; + int next_cpu = -1; - if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) { - struct trace_array_cpu *data = iter->tr->data[iter->cpu]; + next = __find_next_entry(iter, &next_cpu, 1); - iter->next_page_idx[iter->cpu] = 0; - iter->next_page[iter->cpu] = - trace_next_list(data, iter->next_page[iter->cpu]); - } + iter->prev_ent = iter->ent; + iter->prev_cpu = iter->cpu; + + iter->ent = next; + iter->cpu = next_cpu; + + if (next) + trace_iterator_increment(iter, iter->cpu); + + return next ? iter : NULL; } static void trace_consume(struct trace_iterator *iter) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; + struct trace_entry *ent; + again: data->trace_tail_idx++; if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { data->trace_tail = trace_next_page(data, data->trace_tail); @@ -1160,25 +1243,11 @@ static void trace_consume(struct trace_iterator *iter) if (data->trace_head == data->trace_tail && data->trace_head_idx == data->trace_tail_idx) data->trace_idx = 0; -} - -static void *find_next_entry_inc(struct trace_iterator *iter) -{ - struct trace_entry *next; - int next_cpu = -1; - - next = find_next_entry(iter, &next_cpu); - - iter->prev_ent = iter->ent; - iter->prev_cpu = iter->cpu; - - iter->ent = next; - iter->cpu = next_cpu; - - if (next) - trace_iterator_increment(iter); - return next ? iter : NULL; + ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu], + iter, iter->cpu); + if (ent && ent->type == TRACE_CONT) + goto again; } static void *s_next(struct seq_file *m, void *v, loff_t *pos) @@ -1473,6 +1542,26 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; +static void +trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) +{ + struct trace_array *tr = iter->tr; + struct trace_array_cpu *data = tr->data[iter->cpu]; + struct trace_entry *ent; + + ent = trace_entry_idx(tr, data, iter, iter->cpu); + if (!ent || ent->type != TRACE_CONT) { + trace_seq_putc(s, '\n'); + return; + } + + do { + trace_seq_printf(s, "%s", ent->cont.buf); + trace_iterator_increment(iter, iter->cpu); + ent = trace_entry_idx(tr, data, iter, iter->cpu); + } while (ent && ent->type == TRACE_CONT); +} + static int print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -1491,6 +1580,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (!next_entry) next_entry = entry; + + if (entry->type == TRACE_CONT) + return 1; + rel_usecs = ns2usecs(next_entry->field.t - entry->field.t); abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start); @@ -1550,6 +1643,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } trace_seq_puts(s, "\n"); break; + case TRACE_PRINT: + seq_print_ip_sym(s, field->print.ip, sym_flags); + trace_seq_printf(s, ": %s", field->print.buf); + if (field->flags && TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1571,6 +1670,10 @@ static int print_trace_fmt(struct trace_iterator *iter) int i; entry = iter->ent; + + if (entry->type == TRACE_CONT) + return 1; + field = &entry->field; comm = trace_find_cmdline(iter->ent->field.pid); @@ -1653,6 +1756,12 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_PRINT: + seq_print_ip_sym(s, field->print.ip, sym_flags); + trace_seq_printf(s, ": %s", field->print.buf); + if (field->flags && TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; } return 1; } @@ -1666,6 +1775,10 @@ static int print_raw_fmt(struct trace_iterator *iter) int S, T; entry = iter->ent; + + if (entry->type == TRACE_CONT) + return 1; + field = &entry->field; ret = trace_seq_printf(s, "%d %d %llu ", @@ -1708,6 +1821,12 @@ static int print_raw_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_PRINT: + trace_seq_printf(s, "# %lx %s", + field->print.ip, field->print.buf); + if (field->flags && TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; } return 1; } @@ -1733,6 +1852,10 @@ static int print_hex_fmt(struct trace_iterator *iter) int S, T; entry = iter->ent; + + if (entry->type == TRACE_CONT) + return 1; + field = &entry->field; SEQ_PUT_HEX_FIELD_RET(s, field->pid); @@ -1778,6 +1901,10 @@ static int print_bin_fmt(struct trace_iterator *iter) struct trace_field *field; entry = iter->ent; + + if (entry->type == TRACE_CONT) + return 1; + field = &entry->field; SEQ_PUT_FIELD_RET(s, field->pid); @@ -2943,6 +3070,94 @@ static __init void tracer_init_debugfs(void) #endif } +#define TRACE_BUF_SIZE 1024 +#define TRACE_PRINT_BUF_SIZE \ + (sizeof(struct trace_field) - offsetof(struct trace_field, print.buf)) +#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field) + +/** + * ftrace_printk - printf formatting in the ftrace buffer + * @fmt - the printf format for printing. + * + * Note: __ftrace_printk is an internal function for ftrace_printk and + * the @ip is passed in via the ftrace_printk macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please reframe from leaving ftrace_printks scattered around in + * your code. + */ +int __ftrace_printk(unsigned long ip, const char *fmt, ...) +{ + struct trace_array *tr = &global_trace; + static DEFINE_SPINLOCK(trace_buf_lock); + static char trace_buf[TRACE_BUF_SIZE]; + struct trace_array_cpu *data; + struct trace_entry *entry; + unsigned long flags; + long disabled; + va_list ap; + int cpu, len = 0, write, written = 0; + + if (likely(!ftrace_function_enabled)) + return 0; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (unlikely(disabled != 1 || !ftrace_function_enabled)) + goto out; + + spin_lock(&trace_buf_lock); + va_start(ap, fmt); + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap); + va_end(ap); + + len = min(len, TRACE_BUF_SIZE-1); + trace_buf[len] = 0; + + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_PRINT; + entry->field.print.ip = ip; + + write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1)); + + memcpy(&entry->field.print.buf, trace_buf, write); + entry->field.print.buf[write] = 0; + written = write; + + if (written != len) + entry->field.flags |= TRACE_FLAG_CONT; + + while (written != len) { + entry = tracing_get_trace_entry(tr, data); + + entry->type = TRACE_CONT; + write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1)); + memcpy(&entry->cont.buf, trace_buf+written, write); + entry->cont.buf[write] = 0; + written += write; + } + __raw_spin_unlock(&data->lock); + + spin_unlock(&trace_buf_lock); + + out: + atomic_dec(&data->disabled); + local_irq_restore(flags); + + return len; +} +EXPORT_SYMBOL_GPL(__ftrace_printk); + static int trace_alloc_page(void) { struct trace_array_cpu *data; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6ddd6a6556c..50b6d7a6f01 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -13,7 +13,9 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, + TRACE_CONT, TRACE_STACK, + TRACE_PRINT, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -60,6 +62,14 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; +/* + * ftrace_printk entry: + */ +struct print_entry { + unsigned long ip; + char buf[]; +}; + /* * The trace field - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -77,6 +87,7 @@ struct trace_field { struct ctx_switch_entry ctx; struct special_entry special; struct stack_entry stack; + struct print_entry print; struct mmiotrace_rw mmiorw; struct mmiotrace_map mmiomap; }; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 0911b7e073b..630715bbd57 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -9,7 +9,9 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_FN: case TRACE_CTX: case TRACE_WAKE: + case TRACE_CONT: case TRACE_STACK: + case TRACE_PRINT: case TRACE_SPECIAL: return 1; } @@ -120,11 +122,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, struct trace_array *tr, int (*func)(void)) { - unsigned long count; - int ret; int save_ftrace_enabled = ftrace_enabled; int save_tracer_enabled = tracer_enabled; + unsigned long count; char *func_name; + int ret; /* The ftrace test PASSED */ printk(KERN_CONT "PASSED\n"); @@ -157,6 +159,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* enable tracing */ tr->ctrl = 1; trace->init(tr); + /* Sleep for a 1/10 of a second */ msleep(100); @@ -212,10 +215,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { - unsigned long count; - int ret; int save_ftrace_enabled = ftrace_enabled; int save_tracer_enabled = tracer_enabled; + unsigned long count; + int ret; /* make sure msleep has been recorded */ msleep(1); -- cgit v1.2.3 From f09ce573f57ddc35c67b39e51f34545877b30031 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Sep 2008 10:24:14 +0200 Subject: ftrace: make ftrace_printk usable with the other tracers Currently ftrace_printk only works with the ftrace tracer, switch it to an iter_ctrl setting so we can make us of them with other tracers too. [rostedt@redhat.com: tweak to the disable condition] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +++++--- kernel/trace/trace.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d24101cfc42..8a00d6c5c0f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -235,6 +235,7 @@ static const char *trace_options[] = { "block", "stacktrace", "sched-tree", + "ftrace_printk", NULL }; @@ -3091,9 +3092,10 @@ static __init void tracer_init_debugfs(void) int __ftrace_printk(unsigned long ip, const char *fmt, ...) { - struct trace_array *tr = &global_trace; static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; + + struct trace_array *tr = &global_trace; struct trace_array_cpu *data; struct trace_entry *entry; unsigned long flags; @@ -3101,7 +3103,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) va_list ap; int cpu, len = 0, write, written = 0; - if (likely(!ftrace_function_enabled)) + if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled) return 0; local_irq_save(flags); @@ -3109,7 +3111,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (unlikely(disabled != 1 || !ftrace_function_enabled)) + if (unlikely(disabled != 1)) goto out; spin_lock(&trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 50b6d7a6f01..5f54c875c8b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -356,6 +356,7 @@ enum trace_iterator_flags { TRACE_ITER_BLOCK = 0x80, TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, + TRACE_ITER_PRINTK = 0x400, }; #endif /* _LINUX_KERNEL_TRACE_H */ -- cgit v1.2.3 From 80b5e940050c273ba277acbf3a9fbc1d4441e681 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Sep 2008 10:24:16 +0200 Subject: ftrace: sched_switch: show the wakee's cpu While profiling the smp behaviour of the scheduler it was needed to know to which cpu a task got woken. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++++--- kernel/trace/trace.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a00d6c5c0f..7e6cb4fe62f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -977,6 +977,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->field.ctx.next_pid = next->pid; entry->field.ctx.next_prio = next->prio; entry->field.ctx.next_state = next->state; + entry->field.ctx.next_cpu = task_cpu(next); __trace_stack(tr, data, flags, 5); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); @@ -1003,6 +1004,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->field.ctx.next_pid = wakee->pid; entry->field.ctx.next_prio = wakee->prio; entry->field.ctx.next_state = wakee->state; + entry->field.ctx.next_cpu = task_cpu(wakee); __trace_stack(tr, data, flags, 6); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); @@ -1636,10 +1638,11 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) __ffs(field->ctx.prev_state) + 1 : 0; S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; comm = trace_find_cmdline(field->ctx.next_pid); - trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", + trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", field->ctx.prev_pid, field->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", + field->ctx.next_cpu, field->ctx.next_pid, field->ctx.next_prio, T, comm); @@ -1736,11 +1739,12 @@ static int print_trace_fmt(struct trace_iterator *iter) state_to_char[field->ctx.prev_state] : 'X'; T = field->ctx.next_state < sizeof(state_to_char) ? state_to_char[field->ctx.next_state] : 'X'; - ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", + ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n", field->ctx.prev_pid, field->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", + field->ctx.next_cpu, field->ctx.next_pid, field->ctx.next_prio, T); @@ -1817,10 +1821,11 @@ static int print_raw_fmt(struct trace_iterator *iter) state_to_char[field->ctx.next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; - ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", + ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n", field->ctx.prev_pid, field->ctx.prev_prio, S, + field->ctx.next_cpu, field->ctx.next_pid, field->ctx.next_prio, T); @@ -1893,6 +1898,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid); SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio); SEQ_PUT_HEX_FIELD_RET(s, S); + SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu); SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid); SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio); SEQ_PUT_HEX_FIELD_RET(s, T); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5f54c875c8b..77c265f6a77 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -41,6 +41,7 @@ struct ctx_switch_entry { unsigned int next_pid; unsigned char next_prio; unsigned char next_state; + unsigned int next_cpu; }; /* -- cgit v1.2.3 From 45dcd8b8a8ca855591e3ac882d3a7fc255d09d43 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 21:56:41 +0300 Subject: ftrace: move mmiotrace functions out of trace.c Moves the mmiotrace specific functions from trace.c to trace_mmiotrace.c. Functions trace_wake_up(), tracing_get_trace_entry(), and tracing_generic_entry_update() are therefore made available outside trace.c. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 46 ++---------------------------------------- kernel/trace/trace.h | 15 ++++++-------- kernel/trace/trace_mmiotrace.c | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 53 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7e6cb4fe62f..d372bc53596 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -785,7 +785,7 @@ trace_next_page(struct trace_array_cpu *data, void *addr) return page_address(page); } -static inline struct trace_entry * +struct trace_entry * tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; @@ -821,7 +821,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) return entry; } -static inline void +void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; @@ -865,48 +865,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -#ifdef CONFIG_MMIOTRACE -void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, - struct mmiotrace_rw *rw) -{ - struct trace_entry *entry; - unsigned long irq_flags; - - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - - entry = tracing_get_trace_entry(tr, data); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_MMIO_RW; - entry->field.mmiorw = *rw; - - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); - - trace_wake_up(); -} - -void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, - struct mmiotrace_map *map) -{ - struct trace_entry *entry; - unsigned long irq_flags; - - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - - entry = tracing_get_trace_entry(tr, data); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_MMIO_MAP; - entry->field.mmiomap = *map; - - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); - - trace_wake_up(); -} -#endif - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 77c265f6a77..9d39aa00a9c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -213,11 +213,17 @@ struct trace_iterator { long idx; }; +void trace_wake_up(void); void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); +struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, + struct trace_array_cpu *data); +void tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, @@ -291,15 +297,6 @@ extern unsigned long ftrace_update_tot_cnt; extern int DYN_FTRACE_TEST_NAME(void); #endif -#ifdef CONFIG_MMIOTRACE -extern void __trace_mmiotrace_rw(struct trace_array *tr, - struct trace_array_cpu *data, - struct mmiotrace_rw *rw); -extern void __trace_mmiotrace_map(struct trace_array *tr, - struct trace_array_cpu *data, - struct mmiotrace_map *map); -#endif - #ifdef CONFIG_FTRACE_STARTUP_TEST #ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 9b7a936f4b1..ef02747b26d 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -276,6 +276,27 @@ __init static int init_mmio_trace(void) } device_initcall(init_mmio_trace); +static void __trace_mmiotrace_rw(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_rw *rw) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_RW; + entry->field.mmiorw = *rw; + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); + + trace_wake_up(); +} + void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; @@ -283,6 +304,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw) __trace_mmiotrace_rw(tr, data, rw); } +static void __trace_mmiotrace_map(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_map *map) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_MAP; + entry->field.mmiomap = *map; + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); + + trace_wake_up(); +} + void mmio_trace_mapping(struct mmiotrace_map *map) { struct trace_array *tr = mmio_trace_array; -- cgit v1.2.3 From 801fe40001dfc263848552fb28924b766ed44ea4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 21:58:24 +0300 Subject: ftrace: add trace_vprintk() trace_vprintk() for easier implementation of tracer specific *_printk functions. Add check check for no_tracer, and implement __ftrace_printk() as a wrapper. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 24 ++++++++++++++++++------ kernel/trace/trace.h | 1 + 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d372bc53596..406de9cf282 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3054,7 +3054,7 @@ static __init void tracer_init_debugfs(void) (sizeof(struct trace_field) - offsetof(struct trace_field, print.buf)) #define TRACE_CONT_BUF_SIZE sizeof(struct trace_field) -int __ftrace_printk(unsigned long ip, const char *fmt, ...) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; @@ -3064,10 +3064,9 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) struct trace_entry *entry; unsigned long flags; long disabled; - va_list ap; int cpu, len = 0, write, written = 0; - if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled) + if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled) return 0; local_irq_save(flags); @@ -3079,9 +3078,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) goto out; spin_lock(&trace_buf_lock); - va_start(ap, fmt); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap); - va_end(ap); + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); trace_buf[len] = 0; @@ -3120,6 +3117,21 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return len; } +EXPORT_SYMBOL_GPL(trace_vprintk); + +int __ftrace_printk(unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_vprintk(ip, fmt, ap); + va_end(ap); + return ret; +} EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9d39aa00a9c..be3b3cf95f4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -333,6 +333,7 @@ extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); +extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; -- cgit v1.2.3 From fc5e27ae4b45a0619701a83f30d9b7fad7ed9400 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:02:27 +0300 Subject: mmiotrace: handle TRACE_PRINT entries Also make trace_seq_print_cont() non-static, and add a newline if the seq buffer can't hold all data. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 31 +++++++++++-------------------- kernel/trace/trace.h | 19 +++++++++++++++++++ kernel/trace/trace_mmiotrace.c | 23 +++++++++++++++++++++++ 3 files changed, 53 insertions(+), 20 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 406de9cf282..7e7154f7700 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -199,23 +199,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -/* - * trace_flag_type is an enumeration that holds different - * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * NEED_RESCED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - * CONT - multiple entries hold the trace item - */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_NEED_RESCHED = 0x02, - TRACE_FLAG_HARDIRQ = 0x04, - TRACE_FLAG_SOFTIRQ = 0x08, - TRACE_FLAG_CONT = 0x10, -}; - /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. @@ -1517,12 +1500,16 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static void -trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) +/* + * The message is supposed to contain an ending newline. + * If the printing stops prematurely, try to add a newline of our own. + */ +void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_array_cpu *data = tr->data[iter->cpu]; struct trace_entry *ent; + bool ok = true; ent = trace_entry_idx(tr, data, iter, iter->cpu); if (!ent || ent->type != TRACE_CONT) { @@ -1531,10 +1518,14 @@ trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) } do { - trace_seq_printf(s, "%s", ent->cont.buf); + if (ok) + ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0); __trace_iterator_increment(iter, iter->cpu); ent = trace_entry_idx(tr, data, iter, iter->cpu); } while (ent && ent->type == TRACE_CONT); + + if (!ok) + trace_seq_putc(s, '\n'); } static int diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index be3b3cf95f4..648433d18cc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -71,6 +71,23 @@ struct print_entry { char buf[]; }; +/* + * trace_flag_type is an enumeration that holds different + * states when a trace occurs. These are: + * IRQS_OFF - interrupts were disabled + * NEED_RESCED - reschedule is requested + * HARDIRQ - inside an interrupt handler + * SOFTIRQ - inside a softirq handler + * CONT - multiple entries hold the trace item + */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_HARDIRQ = 0x04, + TRACE_FLAG_SOFTIRQ = 0x08, + TRACE_FLAG_CONT = 0x10, +}; + /* * The trace field - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -330,6 +347,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern void trace_seq_print_cont(struct trace_seq *s, + struct trace_iterator *iter); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 767d1faf56e..a108c326f36 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -245,6 +245,27 @@ static int mmio_print_map(struct trace_iterator *iter) return 0; } +static int mmio_print_mark(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + const char *msg = entry->field.print.buf; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->field.t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret; + + /* The trailing newline must be in the message. */ + ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); + if (!ret) + return 0; + + if (entry->field.flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + + return 1; +} + /* return 0 to abort printing without consuming current entry in pipe mode */ static int mmio_print_line(struct trace_iterator *iter) { @@ -253,6 +274,8 @@ static int mmio_print_line(struct trace_iterator *iter) return mmio_print_rw(iter); case TRACE_MMIO_MAP: return mmio_print_map(iter); + case TRACE_PRINT: + return mmio_print_mark(iter); default: return 1; /* ignore unknown entries */ } -- cgit v1.2.3 From 5bf9a1ee350a10feb94107de32a203d81fbbe706 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:06:42 +0300 Subject: ftrace: inject markers via trace_marker file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow a user to inject a marker (TRACE_PRINT entry) into the trace ring buffer. The related file operations are derived from code by Frédéric Weisbecker . Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/tracers/mmiotrace.txt | 5 +-- kernel/trace/trace.c | 76 ++++++++++++++++++++++++++++++++++--- kernel/trace/trace.h | 4 ++ 3 files changed, 77 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt index a4afb560a45..5bbbe209622 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/tracers/mmiotrace.txt @@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug $ echo mmiotrace > /debug/tracing/current_tracer $ cat /debug/tracing/trace_pipe > mydump.txt & Start X or whatever. -$ echo "X is up" > /debug/tracing/marker +$ echo "X is up" > /debug/tracing/trace_marker $ echo none > /debug/tracing/current_tracer Check for lost events. @@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background. Load the driver you want to trace and use it. Mmiotrace will only catch MMIO accesses to areas that are ioremapped while mmiotrace is active. -[Unimplemented feature:] During tracing you can place comments (markers) into the trace by -$ echo "X is up" > /debug/tracing/marker +$ echo "X is up" > /debug/tracing/trace_marker This makes it easier to see which part of the (huge) trace corresponds to which action. It is recommended to place descriptive markers about what you do. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7e7154f7700..eee1fd96489 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2879,6 +2879,66 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, return cnt; } +static int tracing_open_mark(struct inode *inode, struct file *filp) +{ + int ret; + + ret = tracing_open_generic(inode, filp); + if (ret) + return ret; + + if (current_trace == &no_tracer) + return -ENODEV; + + return 0; +} + +static int mark_printk(const char *fmt, ...) +{ + int ret; + va_list args; + va_start(args, fmt); + ret = trace_vprintk(0, fmt, args); + va_end(args); + return ret; +} + +static ssize_t +tracing_mark_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + char *buf; + char *end; + struct trace_array *tr = &global_trace; + + if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled) + return -EINVAL; + + if (cnt > TRACE_BUF_SIZE) + cnt = TRACE_BUF_SIZE; + + buf = kmalloc(cnt + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, ubuf, cnt)) { + kfree(buf); + return -EFAULT; + } + + /* Cut from the first nil or newline. */ + buf[cnt] = '\0'; + end = strchr(buf, '\n'); + if (end) + *end = '\0'; + + cnt = mark_printk("%s\n", buf); + kfree(buf); + *fpos += cnt; + + return cnt; +} + static struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -2910,6 +2970,11 @@ static struct file_operations tracing_entries_fops = { .write = tracing_entries_write, }; +static struct file_operations tracing_mark_fops = { + .open = tracing_open_mark, + .write = tracing_mark_write, +}; + #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t @@ -3027,6 +3092,12 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_entries' entry\n"); + entry = debugfs_create_file("trace_marker", 0220, d_tracer, + NULL, &tracing_mark_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'trace_marker' entry\n"); + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, @@ -3040,11 +3111,6 @@ static __init void tracer_init_debugfs(void) #endif } -#define TRACE_BUF_SIZE 1024 -#define TRACE_PRINT_BUF_SIZE \ - (sizeof(struct trace_field) - offsetof(struct trace_field, print.buf)) -#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field) - int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { static DEFINE_SPINLOCK(trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 648433d18cc..42f65d0097f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -124,6 +124,10 @@ struct trace_entry { }; #define TRACE_ENTRY_SIZE sizeof(struct trace_entry) +#define TRACE_BUF_SIZE 1024 +#define TRACE_PRINT_BUF_SIZE \ + (sizeof(struct trace_field) - offsetof(struct trace_field, print.buf)) +#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field) /* * The CPU trace array - it consists of thousands of trace entries -- cgit v1.2.3 From fb1b6d8b5154c692172a424e45fbd0573295cb93 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Fri, 19 Sep 2008 03:06:43 -0700 Subject: ftrace: add nop tracer A no-op tracer which can serve two purposes: 1. A template for development of a new tracer. 2. A convenient way to see ftrace_printk() calls without an irrelevant trace making the output messy. [ mingo@elte.hu: resolved conflicts ] Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 10 +++++++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 4 +++ kernel/trace/trace_nop.c | 65 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_selftest.c | 9 ++++++ 5 files changed, 89 insertions(+) create mode 100644 kernel/trace/trace_nop.c (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 16e5bb5daaa..d7b2de74463 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -101,6 +101,16 @@ config SCHED_TRACER This tracer tracks the latency of the highest priority task to be scheduled in, starting from the point it has woken up. +config NOP_TRACER + bool "NOP Tracer" + depends on HAVE_FTRACE + depends on DEBUG_KERNEL + select TRACING + help + This tracer does nothing. The primary purpose for it is to + politely print the output of ftrace_printk() calls without + the overhead of an irrelevant trace taking place. + config CONTEXT_SWITCH_TRACER bool "Trace process context switches" depends on HAVE_FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 58ec61c44bd..73ba13f5a46 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o +obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 42f65d0097f..447d4b9b639 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -339,6 +339,10 @@ extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); #endif +#ifdef CONFIG_NOP_TRACER +extern int trace_selftest_startup_nop(struct tracer *trace, + struct trace_array *tr); +#endif #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c new file mode 100644 index 00000000000..dafaefb8403 --- /dev/null +++ b/kernel/trace/trace_nop.c @@ -0,0 +1,65 @@ +/* + * nop tracer + * + * Copyright (C) 2008 Steven Noonan + * + */ + +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *ctx_trace; + +static void start_nop_trace(struct trace_array *tr) +{ + /* Nothing to do! */ +} + +static void stop_nop_trace(struct trace_array *tr) +{ + /* Nothing to do! */ +} + +static void nop_trace_init(struct trace_array *tr) +{ + ctx_trace = tr; + + if (tr->ctrl) + start_nop_trace(tr); +} + +static void nop_trace_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_nop_trace(tr); +} + +static void nop_trace_ctrl_update(struct trace_array *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_nop_trace(tr); + else + stop_nop_trace(tr); +} + +static struct tracer nop_trace __read_mostly = +{ + .name = "nop", + .init = nop_trace_init, + .reset = nop_trace_reset, + .ctrl_update = nop_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_nop, +#endif +}; + +__init static int init_nop_trace(void) +{ + return register_tracer(&nop_trace); +} +device_initcall(init_nop_trace); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 630715bbd57..82db9103b9b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -418,6 +418,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } #endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ +#ifdef CONFIG_NOP_TRACER +int +trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) +{ + /* What could possibly go wrong? */ + return 0; +} +#endif + #ifdef CONFIG_SCHED_TRACER static int trace_wakeup_test_thread(void *data) { -- cgit v1.2.3 From 8925b394eca0bb0a444a6487d702d0f650e94a10 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Sat, 20 Sep 2008 01:00:38 -0700 Subject: trace: remove pointless ifdefs The functions are already 'extern' anyway, so there's no problem with linkage. Removing these ifdefs also helps find any potential compiler errors. Suggested by Andrew Morton. Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 447d4b9b639..c8c687088b4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -319,38 +319,22 @@ extern int DYN_FTRACE_TEST_NAME(void); #endif #ifdef CONFIG_FTRACE_STARTUP_TEST -#ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_IRQSOFF_TRACER extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_PREEMPT_TRACER extern int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr); -#endif -#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER) extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_SCHED_TRACER extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_NOP_TRACER extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); -#endif -#ifdef CONFIG_SYSPROF_TRACER extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); -#endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -- cgit v1.2.3 From 43a15386c4faf913f7d70a47748c266d6210cd6e Mon Sep 17 00:00:00 2001 From: Frédéric Weisbecker Date: Sun, 21 Sep 2008 20:16:30 +0200 Subject: tracing/ftrace: replace none tracer by nop tracer Replace "none" tracer by the recently created "nop" tracer. Both are pretty similar except that nop accepts TRACE_PRINT or TRACE_SPECIAL entries. And as a consequence, changing the size of the ring buffer now requires that tracing has already been disabled. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Steven Noonan Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 49 +++++++++--------------------------------------- kernel/trace/trace.h | 2 ++ kernel/trace/trace_nop.c | 7 +------ 3 files changed, 12 insertions(+), 46 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eee1fd96489..f2ef72fa3f1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -142,24 +142,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds iter_ctrl options */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -static notrace void no_trace_init(struct trace_array *tr) -{ - int cpu; - - ftrace_function_enabled = 0; - if(tr->ctrl) - for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); - tracer_enabled = 0; -} - -/* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = { - .name = "none", - .init = no_trace_init -}; - - /** * trace_wake_up - wake up tasks waiting for trace input * @@ -962,7 +944,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) long disabled; int cpu; - if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) + if (tracing_disabled || !tr->ctrl) return; local_irq_save(flags); @@ -2795,6 +2777,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; int i, ret; + struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2814,9 +2797,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - if (current_trace != &no_tracer) { + if (tr->ctrl) { cnt = -EBUSY; - pr_info("ftrace: set current_tracer to none" + pr_info("ftrace: please disable tracing" " before modifying buffer size\n"); goto out; } @@ -2879,20 +2862,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, return cnt; } -static int tracing_open_mark(struct inode *inode, struct file *filp) -{ - int ret; - - ret = tracing_open_generic(inode, filp); - if (ret) - return ret; - - if (current_trace == &no_tracer) - return -ENODEV; - - return 0; -} - static int mark_printk(const char *fmt, ...) { int ret; @@ -2911,7 +2880,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, char *end; struct trace_array *tr = &global_trace; - if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled) + if (!tr->ctrl || tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -2971,7 +2940,7 @@ static struct file_operations tracing_entries_fops = { }; static struct file_operations tracing_mark_fops = { - .open = tracing_open_mark, + .open = tracing_open_generic, .write = tracing_mark_write, }; @@ -3123,7 +3092,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) long disabled; int cpu, len = 0, write, written = 0; - if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled) + if (!tr->ctrl || tracing_disabled) return 0; local_irq_save(flags); @@ -3539,8 +3508,8 @@ __init static int tracer_alloc_buffers(void) trace_init_cmdlines(); - register_tracer(&no_tracer); - current_trace = &no_tracer; + register_tracer(&nop_trace); + current_trace = &nop_trace; /* All seems OK, enable tracing */ global_trace.ctrl = tracer_enabled; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c8c687088b4..cb2c3fb7dd5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -369,4 +369,6 @@ enum trace_iterator_flags { TRACE_ITER_PRINTK = 0x400, }; +extern struct tracer nop_trace; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 9fb02c17ad0..16c9ba060ba 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -51,7 +51,7 @@ static void nop_trace_ctrl_update(struct trace_array *tr) stop_nop_trace(tr); } -static struct tracer nop_trace __read_mostly = +struct tracer nop_trace __read_mostly = { .name = "nop", .init = nop_trace_init, @@ -62,8 +62,3 @@ static struct tracer nop_trace __read_mostly = #endif }; -__init static int init_nop_trace(void) -{ - return register_tracer(&nop_trace); -} -device_initcall(init_nop_trace); -- cgit v1.2.3 From d13744cd6e3fef373a3fe656ac349b4e7c49ff79 Mon Sep 17 00:00:00 2001 From: Frédéric Weisbecker Date: Tue, 23 Sep 2008 11:32:08 +0100 Subject: tracing/ftrace: add the boot tracer Add the boot/initcall tracer. It's primary purpose is to be able to trace the initcalls. It is intended to be used with scripts/bootgraph.pl after some small improvements. Note that it is not active after its init. To avoid tracing (and so crashing) before the whole tracing engine init, you have to explicitly call start_boot_trace() after do_pre_smp_initcalls() to enable it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 19 +++++++++ kernel/trace/trace.h | 4 ++ kernel/trace/trace_boot.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 kernel/trace/trace_boot.c (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5de9903645d..91954eb6460 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,8 @@ #include #include +#include +#include extern int ftrace_enabled; extern int @@ -209,4 +211,21 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +struct boot_trace { + pid_t caller; + initcall_t func; + int result; + unsigned long long duration; +}; + +#ifdef CONFIG_BOOT_TRACER +extern void trace_boot(struct boot_trace *it); +extern void start_boot_trace(void); +#else +static inline void trace_boot(struct boot_trace *it) { } +static inline void start_boot_trace(void) { } +#endif + + + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb2c3fb7dd5..b28bf8812ef 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -6,6 +6,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -19,6 +20,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, + TRACE_BOOT, __TRACE_LAST_TYPE }; @@ -30,6 +32,7 @@ struct ftrace_entry { unsigned long ip; unsigned long parent_ip; }; +extern struct tracer boot_tracer; /* * Context switch trace entry - which task (and prio) we switched from/to: @@ -108,6 +111,7 @@ struct trace_field { struct print_entry print; struct mmiotrace_rw mmiorw; struct mmiotrace_map mmiomap; + struct boot_trace initcall; }; }; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c new file mode 100644 index 00000000000..c65ef8ffd6b --- /dev/null +++ b/kernel/trace/trace_boot.c @@ -0,0 +1,101 @@ +/* + * ring buffer based initcalls tracer + * + * Copyright (C) 2008 Frederic Weisbecker + * + */ + +#include +#include +#include + +#include "trace.h" + +static struct trace_array *boot_trace; +static int trace_boot_enabled; + + +/* Should be started after do_pre_smp_initcalls() in init/main.c */ +void start_boot_trace(void) +{ + trace_boot_enabled = 1; +} + +void stop_boot_trace(struct trace_array *tr) +{ + trace_boot_enabled = 0; +} + +static void boot_trace_init(struct trace_array *tr) +{ + int cpu; + boot_trace = tr; + + trace_boot_enabled = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr->data[cpu]); +} + +static void boot_trace_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_boot_trace(); + else + stop_boot_trace(tr); +} + +static int initcall_print_line(struct trace_iterator *iter) +{ + int ret = 1; + struct trace_entry *entry = iter->ent; + struct boot_trace *it = &entry->field.initcall; + struct trace_seq *s = &iter->seq; + + if (iter->ent->type == TRACE_BOOT) + ret = trace_seq_printf(s, "%pF called from %i " + "returned %d after %lld msecs\n", + it->func, it->caller, it->result, + it->duration); + if (ret) + return 1; + return 0; +} + +struct tracer boot_tracer __read_mostly = +{ + .name = "initcall", + .init = boot_trace_init, + .reset = stop_boot_trace, + .ctrl_update = boot_trace_ctrl_update, + .print_line = initcall_print_line, +}; + + +void trace_boot(struct boot_trace *it) +{ + struct trace_entry *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = boot_trace; + + if (!trace_boot_enabled) + return; + + preempt_disable(); + data = tr->data[smp_processor_id()]; + + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_BOOT; + entry->field.initcall = *it; + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); + trace_wake_up(); + + preempt_enable(); +} -- cgit v1.2.3 From 3928a8a2d98081d1bc3c0a84a2d70e29b90ecf1c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Sep 2008 23:02:41 -0400 Subject: ftrace: make work with new ring buffer This patch ports ftrace over to the new ring buffer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 932 ++++++++++---------------------------- kernel/trace/trace.h | 22 +- kernel/trace/trace_boot.c | 16 +- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_irqsoff.c | 6 +- kernel/trace/trace_mmiotrace.c | 40 +- kernel/trace/trace_nop.c | 2 +- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 60 +-- kernel/trace/trace_sysprof.c | 2 +- 11 files changed, 288 insertions(+), 798 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6ada059832a..ef80793858b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -33,25 +33,22 @@ #include #include +#include #include "trace.h" +#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) + unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; -static unsigned long __read_mostly tracing_nr_buffers; static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int trace_alloc_page(void); -static int trace_free_page(void); - static int tracing_disabled = 1; -static unsigned long tracing_pages_allocated; - long ns2usecs(cycle_t nsec) { @@ -62,7 +59,9 @@ ns2usecs(cycle_t nsec) cycle_t ftrace_now(int cpu) { - return cpu_clock(cpu); + u64 ts = ring_buffer_time_stamp(cpu); + ring_buffer_normalize_time_stamp(cpu, &ts); + return ts; } /* @@ -102,18 +101,18 @@ static int tracer_enabled = 1; int ftrace_function_enabled; /* - * trace_nr_entries is the number of entries that is allocated - * for a buffer. Note, the number of entries is always rounded - * to ENTRIES_PER_PAGE. + * trace_buf_size is the size in bytes that is allocated + * for a buffer. Note, the number of bytes is always rounded + * to page size. * * This number is purposely set to a low number of 16384. * If the dump on oops happens, it will be much appreciated * to not have to wait for all that output. Anyway this can be * boot time and run time configurable. */ -#define TRACE_ENTRIES_DEFAULT 16384UL +#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ -static unsigned long trace_nr_entries = TRACE_ENTRIES_DEFAULT; +static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; /* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; @@ -158,23 +157,21 @@ void trace_wake_up(void) wake_up(&trace_wait); } -#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) - -static int __init set_nr_entries(char *str) +static int __init set_buf_size(char *str) { - unsigned long nr_entries; + unsigned long buf_size; int ret; if (!str) return 0; - ret = strict_strtoul(str, 0, &nr_entries); + ret = strict_strtoul(str, 0, &buf_size); /* nr_entries can not be zero */ - if (ret < 0 || nr_entries == 0) + if (ret < 0 || buf_size == 0) return 0; - trace_nr_entries = nr_entries; + trace_buf_size = buf_size; return 1; } -__setup("trace_entries=", set_nr_entries); +__setup("trace_buf_size=", set_buf_size); unsigned long nsecs_to_usecs(unsigned long nsecs) { @@ -243,54 +240,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } -#define CHECK_COND(cond) \ - if (unlikely(cond)) { \ - tracing_disabled = 1; \ - WARN_ON(1); \ - return -1; \ - } - -/** - * check_pages - integrity check of trace buffers - * - * As a safty measure we check to make sure the data pages have not - * been corrupted. - */ -int check_pages(struct trace_array_cpu *data) -{ - struct page *page, *tmp; - - CHECK_COND(data->trace_pages.next->prev != &data->trace_pages); - CHECK_COND(data->trace_pages.prev->next != &data->trace_pages); - - list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - CHECK_COND(page->lru.next->prev != &page->lru); - CHECK_COND(page->lru.prev->next != &page->lru); - } - - return 0; -} - -/** - * head_page - page address of the first page in per_cpu buffer. - * - * head_page returns the page address of the first page in - * a per_cpu buffer. This also preforms various consistency - * checks to make sure the buffer has not been corrupted. - */ -void *head_page(struct trace_array_cpu *data) -{ - struct page *page; - - if (list_empty(&data->trace_pages)) - return NULL; - - page = list_entry(data->trace_pages.next, struct page, lru); - BUG_ON(&page->lru == &data->trace_pages); - - return page_address(page); -} - /** * trace_seq_printf - sequence printing of trace information * @s: trace sequence descriptor @@ -437,34 +386,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) trace_seq_reset(s); } -/* - * flip the trace buffers between two trace descriptors. - * This usually is the buffers between the global_trace and - * the max_tr to record a snapshot of a current trace. - * - * The ftrace_max_lock must be held. - */ -static void -flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) -{ - struct list_head flip_pages; - - INIT_LIST_HEAD(&flip_pages); - - memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx, - sizeof(struct trace_array_cpu) - - offsetof(struct trace_array_cpu, trace_head_idx)); - - check_pages(tr1); - check_pages(tr2); - list_splice_init(&tr1->trace_pages, &flip_pages); - list_splice_init(&tr2->trace_pages, &tr1->trace_pages); - list_splice_init(&flip_pages, &tr2->trace_pages); - BUG_ON(!list_empty(&flip_pages)); - check_pages(tr1); - check_pages(tr2); -} - /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer @@ -477,17 +398,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_array_cpu *data; - int i; + struct ring_buffer *buf = tr->buffer; WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); - /* clear out all the previous traces */ - for_each_tracing_cpu(i) { - data = tr->data[i]; - flip_trace(max_tr.data[i], data); - tracing_reset(data); - } + + tr->buffer = max_tr.buffer; + max_tr.buffer = buf; + + ring_buffer_reset(tr->buffer); __update_max_tr(tr, tsk, cpu); __raw_spin_unlock(&ftrace_max_lock); @@ -504,16 +423,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct trace_array_cpu *data = tr->data[cpu]; - int i; + int ret; WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); - for_each_tracing_cpu(i) - tracing_reset(max_tr.data[i]); - flip_trace(max_tr.data[cpu], data); - tracing_reset(data); + ring_buffer_reset(max_tr.buffer); + ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); + + WARN_ON_ONCE(ret); __update_max_tr(tr, tsk, cpu); __raw_spin_unlock(&ftrace_max_lock); @@ -550,7 +468,6 @@ int register_tracer(struct tracer *type) #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest) { struct tracer *saved_tracer = current_trace; - struct trace_array_cpu *data; struct trace_array *tr = &global_trace; int saved_ctrl = tr->ctrl; int i; @@ -562,10 +479,7 @@ int register_tracer(struct tracer *type) * If we fail, we do not register this tracer. */ for_each_tracing_cpu(i) { - data = tr->data[i]; - if (!head_page(data)) - continue; - tracing_reset(data); + tracing_reset(tr, i); } current_trace = type; tr->ctrl = 0; @@ -581,10 +495,7 @@ int register_tracer(struct tracer *type) } /* Only reset on passing, to avoid touching corrupted buffers */ for_each_tracing_cpu(i) { - data = tr->data[i]; - if (!head_page(data)) - continue; - tracing_reset(data); + tracing_reset(tr, i); } printk(KERN_CONT "PASSED\n"); } @@ -630,13 +541,9 @@ void unregister_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); } -void tracing_reset(struct trace_array_cpu *data) +void tracing_reset(struct trace_array *tr, int cpu) { - data->trace_idx = 0; - data->overrun = 0; - data->trace_head = data->trace_tail = head_page(data); - data->trace_head_idx = 0; - data->trace_tail_idx = 0; + ring_buffer_reset_cpu(tr->buffer, cpu); } #define SAVED_CMDLINES 128 @@ -722,70 +629,6 @@ void tracing_record_cmdline(struct task_struct *tsk) trace_save_cmdline(tsk); } -static inline struct list_head * -trace_next_list(struct trace_array_cpu *data, struct list_head *next) -{ - /* - * Roundrobin - but skip the head (which is not a real page): - */ - next = next->next; - if (unlikely(next == &data->trace_pages)) - next = next->next; - BUG_ON(next == &data->trace_pages); - - return next; -} - -static inline void * -trace_next_page(struct trace_array_cpu *data, void *addr) -{ - struct list_head *next; - struct page *page; - - page = virt_to_page(addr); - - next = trace_next_list(data, &page->lru); - page = list_entry(next, struct page, lru); - - return page_address(page); -} - -struct trace_entry * -tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) -{ - unsigned long idx, idx_next; - struct trace_entry *entry; - - data->trace_idx++; - idx = data->trace_head_idx; - idx_next = idx + 1; - - BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); - - entry = data->trace_head + idx * TRACE_ENTRY_SIZE; - - if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { - data->trace_head = trace_next_page(data, data->trace_head); - idx_next = 0; - } - - if (data->trace_head == data->trace_tail && - idx_next == data->trace_tail_idx) { - /* overrun */ - data->overrun++; - data->trace_tail_idx++; - if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { - data->trace_tail = - trace_next_page(data, data->trace_tail); - data->trace_tail_idx = 0; - } - } - - data->trace_head_idx = idx_next; - - return entry; -} - void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { @@ -796,7 +639,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) entry->field.preempt_count = pc & 0xff; entry->field.pid = (tsk) ? tsk->pid : 0; - entry->field.t = ftrace_now(raw_smp_processor_id()); entry->field.flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | @@ -808,18 +650,20 @@ void trace_function(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { + struct ring_buffer_event *event; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->field.fn.ip = ip; entry->field.fn.parent_ip = parent_ip; - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } void @@ -835,13 +679,19 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip) { + struct ring_buffer_event *event; struct trace_entry *entry; struct stack_trace trace; + unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_STACKTRACE)) return; - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, flags); entry->type = TRACE_STACK; @@ -853,28 +703,31 @@ void __trace_stack(struct trace_array *tr, trace.entries = entry->field.stack.caller; save_stack_trace(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { + struct ring_buffer_event *event; struct trace_array_cpu *data = __data; struct trace_array *tr = __tr; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, 0); entry->type = TRACE_SPECIAL; entry->field.special.arg1 = arg1; entry->field.special.arg2 = arg2; entry->field.special.arg3 = arg3; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, irq_flags, 4); - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -886,12 +739,15 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags) { + struct ring_buffer_event *event; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; entry->field.ctx.prev_pid = prev->pid; @@ -901,9 +757,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->field.ctx.next_prio = next->prio; entry->field.ctx.next_state = next->state; entry->field.ctx.next_cpu = task_cpu(next); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, flags, 5); - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); } void @@ -913,12 +768,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags) { + struct ring_buffer_event *event; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, flags); entry->type = TRACE_WAKE; entry->field.ctx.prev_pid = curr->pid; @@ -928,9 +786,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->field.ctx.next_prio = wakee->prio; entry->field.ctx.next_state = wakee->state; entry->field.ctx.next_cpu = task_cpu(wakee); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, flags, 6); - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -1011,183 +868,77 @@ enum trace_file_type { TRACE_FILE_LAT_FMT = 1, }; -/* Return the current entry. */ -static struct trace_entry * -trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, - struct trace_iterator *iter, int cpu) -{ - struct page *page; - struct trace_entry *array; - - if (iter->next_idx[cpu] >= tr->entries || - iter->next_idx[cpu] >= data->trace_idx || - (data->trace_head == data->trace_tail && - data->trace_head_idx == data->trace_tail_idx)) - return NULL; - - if (!iter->next_page[cpu]) { - /* Initialize the iterator for this cpu trace buffer */ - WARN_ON(!data->trace_tail); - page = virt_to_page(data->trace_tail); - iter->next_page[cpu] = &page->lru; - iter->next_page_idx[cpu] = data->trace_tail_idx; - } - - page = list_entry(iter->next_page[cpu], struct page, lru); - BUG_ON(&data->trace_pages == &page->lru); - - array = page_address(page); - - WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); - return &array[iter->next_page_idx[cpu]]; -} - -/* Increment the index counter of an iterator by one */ -static void __trace_iterator_increment(struct trace_iterator *iter, int cpu) -{ - iter->next_idx[cpu]++; - iter->next_page_idx[cpu]++; - - if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) { - struct trace_array_cpu *data = iter->tr->data[cpu]; - - iter->next_page_idx[cpu] = 0; - iter->next_page[cpu] = - trace_next_list(data, iter->next_page[cpu]); - } -} - static void trace_iterator_increment(struct trace_iterator *iter, int cpu) { iter->idx++; - __trace_iterator_increment(iter, cpu); + ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); } static struct trace_entry * -trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data, - struct trace_iterator *iter, int cpu) +peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) { - struct list_head *next_page; - struct trace_entry *ent; - int idx, next_idx, next_page_idx; - - ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); - - if (likely(!ent || ent->type != TRACE_CONT)) - return ent; - - /* save the iterator details */ - idx = iter->idx; - next_idx = iter->next_idx[cpu]; - next_page_idx = iter->next_page_idx[cpu]; - next_page = iter->next_page[cpu]; - - /* find a real entry */ - do { - __trace_iterator_increment(iter, cpu); - ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); - } while (ent && ent->type != TRACE_CONT); - - /* reset the iterator */ - iter->idx = idx; - iter->next_idx[cpu] = next_idx; - iter->next_page_idx[cpu] = next_page_idx; - iter->next_page[cpu] = next_page; + struct ring_buffer_event *event; + struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; - return ent; + event = ring_buffer_iter_peek(buf_iter, ts); + return event ? ring_buffer_event_data(event) : NULL; } - static struct trace_entry * -__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc) +__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { - struct trace_array *tr = iter->tr; + struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; + u64 next_ts = 0, ts; int next_cpu = -1; int cpu; for_each_tracing_cpu(cpu) { - if (!head_page(tr->data[cpu])) - continue; - ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); + if (ring_buffer_empty_cpu(buffer, cpu)) + continue; - if (ent && ent->type == TRACE_CONT) { - struct trace_array_cpu *data = tr->data[cpu]; - - if (!inc) - ent = trace_entry_next(tr, data, iter, cpu); - else { - while (ent && ent->type == TRACE_CONT) { - __trace_iterator_increment(iter, cpu); - ent = trace_entry_idx(tr, tr->data[cpu], - iter, cpu); - } - } - } + ent = peek_next_entry(iter, cpu, &ts); /* * Pick the entry with the smallest timestamp: */ - if (ent && (!next || ent->field.t < next->field.t)) { + if (ent && (!next || ts < next_ts)) { next = ent; next_cpu = cpu; + next_ts = ts; } } if (ent_cpu) *ent_cpu = next_cpu; + if (ent_ts) + *ent_ts = next_ts; + return next; } /* Find the next real entry, without updating the iterator itself */ static struct trace_entry * -find_next_entry(struct trace_iterator *iter, int *ent_cpu) +find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { - return __find_next_entry(iter, ent_cpu, 0); + return __find_next_entry(iter, ent_cpu, ent_ts); } /* Find the next real entry, and increment the iterator to the next entry */ static void *find_next_entry_inc(struct trace_iterator *iter) { - struct trace_entry *next; - int next_cpu = -1; - - next = __find_next_entry(iter, &next_cpu, 1); - - iter->prev_ent = iter->ent; - iter->prev_cpu = iter->cpu; + iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); - iter->ent = next; - iter->cpu = next_cpu; - - if (next) + if (iter->ent) trace_iterator_increment(iter, iter->cpu); - return next ? iter : NULL; + return iter->ent ? iter : NULL; } static void trace_consume(struct trace_iterator *iter) { - struct trace_array_cpu *data = iter->tr->data[iter->cpu]; - struct trace_entry *ent; - - again: - data->trace_tail_idx++; - if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { - data->trace_tail = trace_next_page(data, data->trace_tail); - data->trace_tail_idx = 0; - } - - /* Check if we empty it, then reset the index */ - if (data->trace_head == data->trace_tail && - data->trace_head_idx == data->trace_tail_idx) - data->trace_idx = 0; - - ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu], - iter, iter->cpu); - if (ent && ent->type == TRACE_CONT) - goto again; + ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); } static void *s_next(struct seq_file *m, void *v, loff_t *pos) @@ -1220,7 +971,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) struct trace_iterator *iter = m->private; void *p = NULL; loff_t l = 0; - int i; + int cpu; mutex_lock(&trace_types_lock); @@ -1239,12 +990,9 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->ent = NULL; iter->cpu = 0; iter->idx = -1; - iter->prev_ent = NULL; - iter->prev_cpu = -1; - for_each_tracing_cpu(i) { - iter->next_idx[i] = 0; - iter->next_page[i] = NULL; + for_each_tracing_cpu(cpu) { + ring_buffer_iter_reset(iter->buffer_iter[cpu]); } for (p = iter; p && l < *pos; p = s_next(m, p, &l)) @@ -1365,23 +1113,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) struct trace_array *tr = iter->tr; struct trace_array_cpu *data = tr->data[tr->cpu]; struct tracer *type = current_trace; - unsigned long total = 0; - unsigned long entries = 0; - int cpu; + unsigned long total; + unsigned long entries; const char *name = "preemption"; if (type) name = type->name; - for_each_tracing_cpu(cpu) { - if (head_page(tr->data[cpu])) { - total += tr->data[cpu]->trace_idx; - if (tr->data[cpu]->trace_idx > tr->entries) - entries += tr->entries; - else - entries += tr->data[cpu]->trace_idx; - } - } + entries = ring_buffer_entries(iter->tr->buffer); + total = entries + + ring_buffer_overruns(iter->tr->buffer); seq_printf(m, "%s latency trace v1.1.5 on %s\n", name, UTS_RELEASE); @@ -1468,7 +1209,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) unsigned long preempt_mark_thresh = 100; static void -lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, +lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, unsigned long rel_usecs) { trace_seq_printf(s, " %4lldus", abs_usecs); @@ -1488,12 +1229,10 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; */ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) { - struct trace_array *tr = iter->tr; - struct trace_array_cpu *data = tr->data[iter->cpu]; struct trace_entry *ent; bool ok = true; - ent = trace_entry_idx(tr, data, iter, iter->cpu); + ent = peek_next_entry(iter, iter->cpu, NULL); if (!ent || ent->type != TRACE_CONT) { trace_seq_putc(s, '\n'); return; @@ -1502,8 +1241,8 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) do { if (ok) ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0); - __trace_iterator_increment(iter, iter->cpu); - ent = trace_entry_idx(tr, data, iter, iter->cpu); + ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + ent = peek_next_entry(iter, iter->cpu, NULL); } while (ent && ent->type == TRACE_CONT); if (!ok) @@ -1515,25 +1254,26 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_entry *next_entry = find_next_entry(iter, NULL); + struct trace_entry *next_entry; unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); struct trace_entry *entry = iter->ent; struct trace_field *field = &entry->field; unsigned long abs_usecs; unsigned long rel_usecs; + u64 next_ts; char *comm; int S, T; int i; unsigned state; - if (!next_entry) - next_entry = entry; - if (entry->type == TRACE_CONT) return 1; - rel_usecs = ns2usecs(next_entry->field.t - entry->field.t); - abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start); + next_entry = find_next_entry(iter, NULL, &next_ts); + if (!next_entry) + next_ts = iter->ts; + rel_usecs = ns2usecs(next_ts - iter->ts); + abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); if (verbose) { comm = trace_find_cmdline(field->pid); @@ -1542,7 +1282,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm, field->pid, cpu, field->flags, field->preempt_count, trace_idx, - ns2usecs(field->t), + ns2usecs(iter->ts), abs_usecs/1000, abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); @@ -1627,7 +1367,7 @@ static int print_trace_fmt(struct trace_iterator *iter) comm = trace_find_cmdline(iter->ent->field.pid); - t = ns2usecs(field->t); + t = ns2usecs(iter->ts); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; @@ -1732,7 +1472,7 @@ static int print_raw_fmt(struct trace_iterator *iter) field = &entry->field; ret = trace_seq_printf(s, "%d %d %llu ", - field->pid, iter->cpu, field->t); + field->pid, iter->cpu, iter->ts); if (!ret) return 0; @@ -1811,7 +1551,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, field->pid); SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); - SEQ_PUT_HEX_FIELD_RET(s, field->t); + SEQ_PUT_HEX_FIELD_RET(s, iter->ts); switch (entry->type) { case TRACE_FN: @@ -1861,7 +1601,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, field->pid); SEQ_PUT_FIELD_RET(s, field->cpu); - SEQ_PUT_FIELD_RET(s, field->t); + SEQ_PUT_FIELD_RET(s, iter->ts); switch (entry->type) { case TRACE_FN: @@ -1888,15 +1628,10 @@ static int print_bin_fmt(struct trace_iterator *iter) static int trace_empty(struct trace_iterator *iter) { - struct trace_array_cpu *data; int cpu; for_each_tracing_cpu(cpu) { - data = iter->tr->data[cpu]; - - if (head_page(data) && data->trace_idx && - (data->trace_tail != data->trace_head || - data->trace_tail_idx != data->trace_head_idx)) + if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) return 0; } return 1; @@ -1961,6 +1696,8 @@ static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, int *ret) { struct trace_iterator *iter; + struct seq_file *m; + int cpu; if (tracing_disabled) { *ret = -ENODEV; @@ -1981,28 +1718,43 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + for_each_tracing_cpu(cpu) { + iter->buffer_iter[cpu] = + ring_buffer_read_start(iter->tr->buffer, cpu); + if (!iter->buffer_iter[cpu]) + goto fail_buffer; + } + /* TODO stop tracer */ *ret = seq_open(file, &tracer_seq_ops); - if (!*ret) { - struct seq_file *m = file->private_data; - m->private = iter; + if (*ret) + goto fail_buffer; - /* stop the trace while dumping */ - if (iter->tr->ctrl) { - tracer_enabled = 0; - ftrace_function_enabled = 0; - } + m = file->private_data; + m->private = iter; - if (iter->trace && iter->trace->open) - iter->trace->open(iter); - } else { - kfree(iter); - iter = NULL; + /* stop the trace while dumping */ + if (iter->tr->ctrl) { + tracer_enabled = 0; + ftrace_function_enabled = 0; } + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + mutex_unlock(&trace_types_lock); out: return iter; + + fail_buffer: + for_each_tracing_cpu(cpu) { + if (iter->buffer_iter[cpu]) + ring_buffer_read_finish(iter->buffer_iter[cpu]); + } + mutex_unlock(&trace_types_lock); + + return ERR_PTR(-ENOMEM); } int tracing_open_generic(struct inode *inode, struct file *filp) @@ -2018,8 +1770,14 @@ int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; struct trace_iterator *iter = m->private; + int cpu; mutex_lock(&trace_types_lock); + for_each_tracing_cpu(cpu) { + if (iter->buffer_iter[cpu]) + ring_buffer_read_finish(iter->buffer_iter[cpu]); + } + if (iter->trace && iter->trace->close) iter->trace->close(iter); @@ -2526,6 +2284,7 @@ static atomic_t tracing_reader; static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_iterator *iter; + int cpu; if (tracing_disabled) return -ENODEV; @@ -2546,17 +2305,38 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->trace = current_trace; filp->private_data = iter; + for_each_tracing_cpu(cpu) { + iter->buffer_iter[cpu] = + ring_buffer_read_start(iter->tr->buffer, cpu); + if (!iter->buffer_iter[cpu]) + goto fail_buffer; + } + if (iter->trace->pipe_open) iter->trace->pipe_open(iter); mutex_unlock(&trace_types_lock); return 0; + + fail_buffer: + for_each_tracing_cpu(cpu) { + if (iter->buffer_iter[cpu]) + ring_buffer_read_finish(iter->buffer_iter[cpu]); + } + mutex_unlock(&trace_types_lock); + + return -ENOMEM; } static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + int cpu; + for_each_tracing_cpu(cpu) { + if (iter->buffer_iter[cpu]) + ring_buffer_read_finish(iter->buffer_iter[cpu]); + } kfree(iter); atomic_dec(&tracing_reader); @@ -2592,13 +2372,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - struct trace_array_cpu *data; - static cpumask_t mask; unsigned long flags; #ifdef CONFIG_FTRACE int ftrace_save; #endif - int cpu; ssize_t sret; /* return any leftover data */ @@ -2687,32 +2464,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, * and then release the locks again. */ - cpus_clear(mask); - local_irq_save(flags); + local_irq_disable(); #ifdef CONFIG_FTRACE ftrace_save = ftrace_enabled; ftrace_enabled = 0; #endif smp_wmb(); - for_each_tracing_cpu(cpu) { - data = iter->tr->data[cpu]; - - if (!head_page(data) || !data->trace_idx) - continue; - - atomic_inc(&data->disabled); - cpu_set(cpu, mask); - } - - for_each_cpu_mask(cpu, mask) { - data = iter->tr->data[cpu]; - __raw_spin_lock(&data->lock); - - if (data->overrun > iter->last_overrun[cpu]) - iter->overrun[cpu] += - data->overrun - iter->last_overrun[cpu]; - iter->last_overrun[cpu] = data->overrun; - } + ring_buffer_lock(iter->tr->buffer, &flags); while (find_next_entry_inc(iter) != NULL) { int ret; @@ -2731,19 +2489,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, break; } - for_each_cpu_mask(cpu, mask) { - data = iter->tr->data[cpu]; - __raw_spin_unlock(&data->lock); - } - - for_each_cpu_mask(cpu, mask) { - data = iter->tr->data[cpu]; - atomic_dec(&data->disabled); - } + ring_buffer_unlock(iter->tr->buffer, flags); #ifdef CONFIG_FTRACE ftrace_enabled = ftrace_save; #endif - local_irq_restore(flags); + local_irq_enable(); /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); @@ -2776,7 +2526,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int i, ret; + int ret; struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) @@ -2804,52 +2554,31 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, goto out; } - if (val > global_trace.entries) { - long pages_requested; - unsigned long freeable_pages; - - /* make sure we have enough memory before mapping */ - pages_requested = - (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE; - - /* account for each buffer (and max_tr) */ - pages_requested *= tracing_nr_buffers * 2; - - /* Check for overflow */ - if (pages_requested < 0) { - cnt = -ENOMEM; - goto out; - } - - freeable_pages = determine_dirtyable_memory(); - - /* we only allow to request 1/4 of useable memory */ - if (pages_requested > - ((freeable_pages + tracing_pages_allocated) / 4)) { - cnt = -ENOMEM; + if (val != global_trace.entries) { + ret = ring_buffer_resize(global_trace.buffer, val); + if (ret < 0) { + cnt = ret; goto out; } - while (global_trace.entries < val) { - if (trace_alloc_page()) { - cnt = -ENOMEM; - goto out; + ret = ring_buffer_resize(max_tr.buffer, val); + if (ret < 0) { + int r; + cnt = ret; + r = ring_buffer_resize(global_trace.buffer, + global_trace.entries); + if (r < 0) { + /* AARGH! We are left with different + * size max buffer!!!! */ + WARN_ON(1); + tracing_disabled = 1; } - /* double check that we don't go over the known pages */ - if (tracing_pages_allocated > pages_requested) - break; + goto out; } - } else { - /* include the number of entries in val (inc of page entries) */ - while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) - trace_free_page(); + global_trace.entries = val; } - /* check integrity */ - for_each_tracing_cpu(i) - check_pages(global_trace.data[i]); - filp->f_pos += cnt; /* If check pages failed, return ENOMEM */ @@ -3086,10 +2815,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; + struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; struct trace_entry *entry; - unsigned long flags; + unsigned long flags, irq_flags; long disabled; int cpu, len = 0, write, written = 0; @@ -3110,8 +2840,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) len = min(len, TRACE_BUF_SIZE-1); trace_buf[len] = 0; - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, flags); entry->type = TRACE_PRINT; entry->field.print.ip = ip; @@ -3121,21 +2854,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->field.print.buf, trace_buf, write); entry->field.print.buf[write] = 0; written = write; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); if (written != len) entry->field.flags |= TRACE_FLAG_CONT; while (written != len) { - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); entry->type = TRACE_CONT; write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1)); memcpy(&entry->cont.buf, trace_buf+written, write); entry->cont.buf[write] = 0; written += write; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } - __raw_spin_unlock(&data->lock); + out_unlock: spin_unlock(&trace_buf_lock); out: @@ -3227,12 +2966,10 @@ void ftrace_dump(void) static DEFINE_SPINLOCK(ftrace_dump_lock); /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; - struct trace_array_cpu *data; static cpumask_t mask; static int dump_ran; - unsigned long flags; + unsigned long flags, irq_flags; int cnt = 0; - int cpu; /* only one dump */ spin_lock_irqsave(&ftrace_dump_lock, flags); @@ -3258,25 +2995,7 @@ void ftrace_dump(void) cpus_clear(mask); - for_each_tracing_cpu(cpu) { - data = iter.tr->data[cpu]; - - if (!head_page(data) || !data->trace_idx) - continue; - - atomic_inc(&data->disabled); - cpu_set(cpu, mask); - } - - for_each_cpu_mask(cpu, mask) { - data = iter.tr->data[cpu]; - __raw_spin_lock(&data->lock); - - if (data->overrun > iter.last_overrun[cpu]) - iter.overrun[cpu] += - data->overrun - iter.last_overrun[cpu]; - iter.last_overrun[cpu] = data->overrun; - } + ring_buffer_lock(iter.tr->buffer, &irq_flags); while (!trace_empty(&iter)) { @@ -3305,205 +3024,47 @@ void ftrace_dump(void) else printk(KERN_TRACE "---------------------------------\n"); - for_each_cpu_mask(cpu, mask) { - data = iter.tr->data[cpu]; - __raw_spin_unlock(&data->lock); - } - - for_each_cpu_mask(cpu, mask) { - data = iter.tr->data[cpu]; - atomic_dec(&data->disabled); - } - + ring_buffer_unlock(iter.tr->buffer, irq_flags); out: spin_unlock_irqrestore(&ftrace_dump_lock, flags); } -static int trace_alloc_page(void) +__init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; - struct page *page, *tmp; - LIST_HEAD(pages); - void *array; - unsigned pages_allocated = 0; int i; - /* first allocate a page for each CPU */ - for_each_tracing_cpu(i) { - array = (void *)__get_free_page(GFP_KERNEL); - if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate page" - "for trace buffer!\n"); - goto free_pages; - } - - pages_allocated++; - page = virt_to_page(array); - list_add(&page->lru, &pages); + /* TODO: make the number of buffers hot pluggable with CPUS */ + tracing_buffer_mask = cpu_possible_map; -/* Only allocate if we are actually using the max trace */ -#ifdef CONFIG_TRACER_MAX_TRACE - array = (void *)__get_free_page(GFP_KERNEL); - if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate page" - "for trace buffer!\n"); - goto free_pages; - } - pages_allocated++; - page = virt_to_page(array); - list_add(&page->lru, &pages); -#endif + global_trace.buffer = ring_buffer_alloc(trace_buf_size, + TRACE_BUFFER_FLAGS); + if (!global_trace.buffer) { + printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); + WARN_ON(1); + return 0; } - - /* Now that we successfully allocate a page per CPU, add them */ - for_each_tracing_cpu(i) { - data = global_trace.data[i]; - page = list_entry(pages.next, struct page, lru); - list_del_init(&page->lru); - list_add_tail(&page->lru, &data->trace_pages); - ClearPageLRU(page); + global_trace.entries = ring_buffer_size(global_trace.buffer); #ifdef CONFIG_TRACER_MAX_TRACE - data = max_tr.data[i]; - page = list_entry(pages.next, struct page, lru); - list_del_init(&page->lru); - list_add_tail(&page->lru, &data->trace_pages); - SetPageLRU(page); -#endif - } - tracing_pages_allocated += pages_allocated; - global_trace.entries += ENTRIES_PER_PAGE; - - return 0; - - free_pages: - list_for_each_entry_safe(page, tmp, &pages, lru) { - list_del_init(&page->lru); - __free_page(page); + max_tr.buffer = ring_buffer_alloc(trace_buf_size, + TRACE_BUFFER_FLAGS); + if (!max_tr.buffer) { + printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); + WARN_ON(1); + ring_buffer_free(global_trace.buffer); + return 0; } - return -ENOMEM; -} - -static int trace_free_page(void) -{ - struct trace_array_cpu *data; - struct page *page; - struct list_head *p; - int i; - int ret = 0; - - /* free one page from each buffer */ - for_each_tracing_cpu(i) { - data = global_trace.data[i]; - p = data->trace_pages.next; - if (p == &data->trace_pages) { - /* should never happen */ - WARN_ON(1); - tracing_disabled = 1; - ret = -1; - break; - } - page = list_entry(p, struct page, lru); - ClearPageLRU(page); - list_del(&page->lru); - tracing_pages_allocated--; - tracing_pages_allocated--; - __free_page(page); - - tracing_reset(data); - -#ifdef CONFIG_TRACER_MAX_TRACE - data = max_tr.data[i]; - p = data->trace_pages.next; - if (p == &data->trace_pages) { - /* should never happen */ - WARN_ON(1); - tracing_disabled = 1; - ret = -1; - break; - } - page = list_entry(p, struct page, lru); - ClearPageLRU(page); - list_del(&page->lru); - __free_page(page); - - tracing_reset(data); + max_tr.entries = ring_buffer_size(max_tr.buffer); + WARN_ON(max_tr.entries != global_trace.entries); #endif - } - global_trace.entries -= ENTRIES_PER_PAGE; - - return ret; -} - -__init static int tracer_alloc_buffers(void) -{ - struct trace_array_cpu *data; - void *array; - struct page *page; - int pages = 0; - int ret = -ENOMEM; - int i; - - /* TODO: make the number of buffers hot pluggable with CPUS */ - tracing_nr_buffers = num_possible_cpus(); - tracing_buffer_mask = cpu_possible_map; /* Allocate the first page for all buffers */ for_each_tracing_cpu(i) { data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); max_tr.data[i] = &per_cpu(max_data, i); - - array = (void *)__get_free_page(GFP_KERNEL); - if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate page" - "for trace buffer!\n"); - goto free_buffers; - } - - /* set the array to the list */ - INIT_LIST_HEAD(&data->trace_pages); - page = virt_to_page(array); - list_add(&page->lru, &data->trace_pages); - /* use the LRU flag to differentiate the two buffers */ - ClearPageLRU(page); - - data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - -/* Only allocate if we are actually using the max trace */ -#ifdef CONFIG_TRACER_MAX_TRACE - array = (void *)__get_free_page(GFP_KERNEL); - if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate page" - "for trace buffer!\n"); - goto free_buffers; - } - - INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); - page = virt_to_page(array); - list_add(&page->lru, &max_tr.data[i]->trace_pages); - SetPageLRU(page); -#endif - } - - /* - * Since we allocate by orders of pages, we may be able to - * round up a bit. - */ - global_trace.entries = ENTRIES_PER_PAGE; - pages++; - - while (global_trace.entries < trace_nr_entries) { - if (trace_alloc_page()) - break; - pages++; } - max_tr.entries = global_trace.entries; - - pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", - pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); - pr_info(" actual entries %ld\n", global_trace.entries); trace_init_cmdlines(); @@ -3519,38 +3080,13 @@ __init static int tracer_alloc_buffers(void) /* All seems OK, enable tracing */ global_trace.ctrl = tracer_enabled; tracing_disabled = 0; + atomic_notifier_chain_register(&panic_notifier_list, &trace_panic_notifier); register_die_notifier(&trace_die_notifier); return 0; - - free_buffers: - for (i-- ; i >= 0; i--) { - struct page *page, *tmp; - struct trace_array_cpu *data = global_trace.data[i]; - - if (data) { - list_for_each_entry_safe(page, tmp, - &data->trace_pages, lru) { - list_del_init(&page->lru); - __free_page(page); - } - } - -#ifdef CONFIG_TRACER_MAX_TRACE - data = max_tr.data[i]; - if (data) { - list_for_each_entry_safe(page, tmp, - &data->trace_pages, lru) { - list_del_init(&page->lru); - __free_page(page); - } - } -#endif - } - return ret; } early_initcall(tracer_alloc_buffers); fs_initcall(tracer_init_debugfs); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b28bf8812ef..f6965f775b4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,6 @@ struct trace_field { char flags; char preempt_count; int pid; - cycle_t t; union { struct ftrace_entry fn; struct ctx_switch_entry ctx; @@ -139,16 +139,9 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - struct list_head trace_pages; atomic_t disabled; - raw_spinlock_t lock; - struct lock_class_key lock_key; /* these fields get copied into max-trace: */ - unsigned trace_head_idx; - unsigned trace_tail_idx; - void *trace_head; /* producer */ - void *trace_tail; /* consumer */ unsigned long trace_idx; unsigned long overrun; unsigned long saved_latency; @@ -172,6 +165,7 @@ struct trace_iterator; * They have on/off state as well: */ struct trace_array { + struct ring_buffer *buffer; unsigned long entries; long ctrl; int cpu; @@ -219,27 +213,21 @@ struct trace_iterator { struct trace_array *tr; struct tracer *trace; void *private; - long last_overrun[NR_CPUS]; - long overrun[NR_CPUS]; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; int cpu; - - struct trace_entry *prev_ent; - int prev_cpu; + u64 ts; unsigned long iter_flags; loff_t pos; - unsigned long next_idx[NR_CPUS]; - struct list_head *next_page[NR_CPUS]; - unsigned next_page_idx[NR_CPUS]; long idx; }; void trace_wake_up(void); -void tracing_reset(struct trace_array_cpu *data); +void tracing_reset(struct trace_array *tr, int cpu); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d5c9e2e4a9c..3657eec6b87 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -34,7 +34,7 @@ static void boot_trace_init(struct trace_array *tr) trace_boot_enabled = 0; for_each_cpu_mask(cpu, cpu_possible_map) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); } static void boot_trace_ctrl_update(struct trace_array *tr) @@ -74,6 +74,7 @@ struct tracer boot_tracer __read_mostly = void trace_boot(struct boot_trace *it) { + struct ring_buffer_event *event; struct trace_entry *entry; struct trace_array_cpu *data; unsigned long irq_flags; @@ -85,17 +86,18 @@ void trace_boot(struct boot_trace *it) preempt_disable(); data = tr->data[smp_processor_id()]; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, 0); entry->type = TRACE_BOOT; entry->field.initcall = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); trace_wake_up(); + out: preempt_enable(); } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 31214489797..e90eb0c2c56 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr) tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); } static void start_function_trace(struct trace_array *tr) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ece6cfb649f..37ad49407f2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -173,7 +173,7 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - tracing_reset(data); + tracing_reset(tr, cpu); trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); } @@ -203,7 +203,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; - tracing_reset(data); + tracing_reset(tr, cpu); local_save_flags(flags); @@ -234,7 +234,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!head_page(data)) || + if (unlikely(!data) || !data->critical_start || atomic_read(&data->disabled)) return; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index a108c326f36..bdbf09d8413 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr) tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); } static void mmio_trace_init(struct trace_array *tr) @@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter) { int cpu; unsigned long cnt = 0; +/* FIXME: */ +#if 0 for_each_online_cpu(cpu) { cnt += iter->overrun[cpu]; iter->overrun[cpu] = 0; } +#endif + (void)cpu; return cnt; } @@ -176,7 +180,7 @@ static int mmio_print_rw(struct trace_iterator *iter) struct trace_entry *entry = iter->ent; struct mmiotrace_rw *rw = &entry->field.mmiorw; struct trace_seq *s = &iter->seq; - unsigned long long t = ns2usecs(entry->field.t); + unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret = 1; @@ -218,7 +222,7 @@ static int mmio_print_map(struct trace_iterator *iter) struct trace_entry *entry = iter->ent; struct mmiotrace_map *m = &entry->field.mmiomap; struct trace_seq *s = &iter->seq; - unsigned long long t = ns2usecs(entry->field.t); + unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret = 1; @@ -250,7 +254,7 @@ static int mmio_print_mark(struct trace_iterator *iter) struct trace_entry *entry = iter->ent; const char *msg = entry->field.print.buf; struct trace_seq *s = &iter->seq; - unsigned long long t = ns2usecs(entry->field.t); + unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret; @@ -303,19 +307,19 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { + struct ring_buffer_event *event; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_RW; entry->field.mmiorw = *rw; - - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); } @@ -331,19 +335,19 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { + struct ring_buffer_event *event; struct trace_entry *entry; unsigned long irq_flags; - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - - entry = tracing_get_trace_entry(tr, data); + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_MAP; entry->field.mmiomap = *map; - - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); } diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 16c9ba060ba..4592b486251 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -30,7 +30,7 @@ static void nop_trace_init(struct trace_array *tr) ctx_trace = tr; for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); if (tr->ctrl) start_nop_trace(tr); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 789e927abc9..e0b06db0f7a 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -81,7 +81,7 @@ static void sched_switch_reset(struct trace_array *tr) tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); } static int tracing_sched_register(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 08206b4e29c..01e75e0639b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -191,7 +191,7 @@ static void __wakeup_reset(struct trace_array *tr) for_each_possible_cpu(cpu) { data = tr->data[cpu]; - tracing_reset(data); + tracing_reset(tr, cpu); } wakeup_cpu = -1; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 5ebd4b13549..09cf230d7ec 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -18,58 +18,20 @@ static inline int trace_valid_entry(struct trace_entry *entry) return 0; } -static int -trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) +static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) { - struct trace_entry *entries; - struct page *page; - int idx = 0; - int i; + struct ring_buffer_event *event; + struct trace_entry *entry; - BUG_ON(list_empty(&data->trace_pages)); - page = list_entry(data->trace_pages.next, struct page, lru); - entries = page_address(page); + while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { + entry = ring_buffer_event_data(event); - check_pages(data); - if (head_page(data) != entries) - goto failed; - - /* - * The starting trace buffer always has valid elements, - * if any element exists. - */ - entries = head_page(data); - - for (i = 0; i < tr->entries; i++) { - - if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) { + if (!trace_valid_entry(entry)) { printk(KERN_CONT ".. invalid entry %d ", - entries[idx].type); + entry->type); goto failed; } - - idx++; - if (idx >= ENTRIES_PER_PAGE) { - page = virt_to_page(entries); - if (page->lru.next == &data->trace_pages) { - if (i != tr->entries - 1) { - printk(KERN_CONT ".. entries buffer mismatch"); - goto failed; - } - } else { - page = list_entry(page->lru.next, struct page, lru); - entries = page_address(page); - } - idx = 0; - } - } - - page = virt_to_page(entries); - if (page->lru.next != &data->trace_pages) { - printk(KERN_CONT ".. too many entries"); - goto failed; } - return 0; failed: @@ -91,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) /* Don't allow flipping of max traces now */ raw_local_irq_save(flags); __raw_spin_lock(&ftrace_max_lock); - for_each_possible_cpu(cpu) { - if (!head_page(tr->data[cpu])) - continue; - cnt += tr->data[cpu]->trace_idx; + cnt = ring_buffer_entries(tr->buffer); - ret = trace_test_buffer_cpu(tr, tr->data[cpu]); + for_each_possible_cpu(cpu) { + ret = trace_test_buffer_cpu(tr, cpu); if (ret) break; } diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index db58fb66a13..9587d3bcba5 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr) tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) - tracing_reset(tr->data[cpu]); + tracing_reset(tr, cpu); } static void start_stack_trace(struct trace_array *tr) -- cgit v1.2.3 From 777e208d40d0953efc6fb4ab58590da3f7d8f02d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Sep 2008 23:02:42 -0400 Subject: ftrace: take advantage of variable length entries Now that the underlining ring buffer for ftrace now hold variable length entries, we can take advantage of this by only storing the size of the actual event into the buffer. This happens to increase the number of entries in the buffer dramatically. We can also get rid of the "trace_cont" operation, but I'm keeping that until we have no more users. Some of the ftrace tracers can now change their code to adapt to this new feature. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 439 ++++++++++++++++++++++------------------- kernel/trace/trace.h | 81 ++++---- kernel/trace/trace_boot.c | 13 +- kernel/trace/trace_mmiotrace.c | 31 +-- 4 files changed, 301 insertions(+), 263 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ef80793858b..ed9e47c1881 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -637,9 +637,9 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) pc = preempt_count(); - entry->field.preempt_count = pc & 0xff; - entry->field.pid = (tsk) ? tsk->pid : 0; - entry->field.flags = + entry->preempt_count = pc & 0xff; + entry->pid = (tsk) ? tsk->pid : 0; + entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -651,7 +651,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct ftrace_entry *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -659,10 +659,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, flags); - entry->type = TRACE_FN; - entry->field.fn.ip = ip; - entry->field.fn.parent_ip = parent_ip; + tracing_generic_entry_update(&entry->ent, flags); + entry->ent.type = TRACE_FN; + entry->ip = ip; + entry->parent_ip = parent_ip; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } @@ -680,7 +680,7 @@ void __trace_stack(struct trace_array *tr, int skip) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct stack_entry *entry; struct stack_trace trace; unsigned long irq_flags; @@ -692,15 +692,15 @@ void __trace_stack(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, flags); - entry->type = TRACE_STACK; + tracing_generic_entry_update(&entry->ent, flags); + entry->ent.type = TRACE_STACK; - memset(&entry->field.stack, 0, sizeof(entry->field.stack)); + memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; trace.max_entries = FTRACE_STACK_ENTRIES; trace.skip = skip; - trace.entries = entry->field.stack.caller; + trace.entries = entry->caller; save_stack_trace(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); @@ -713,7 +713,7 @@ __trace_special(void *__tr, void *__data, struct ring_buffer_event *event; struct trace_array_cpu *data = __data; struct trace_array *tr = __tr; - struct trace_entry *entry; + struct special_entry *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -721,11 +721,11 @@ __trace_special(void *__tr, void *__data, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_SPECIAL; - entry->field.special.arg1 = arg1; - entry->field.special.arg2 = arg2; - entry->field.special.arg3 = arg3; + tracing_generic_entry_update(&entry->ent, 0); + entry->ent.type = TRACE_SPECIAL; + entry->arg1 = arg1; + entry->arg2 = arg2; + entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, irq_flags, 4); @@ -740,7 +740,7 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct ctx_switch_entry *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -748,15 +748,15 @@ tracing_sched_switch_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, flags); - entry->type = TRACE_CTX; - entry->field.ctx.prev_pid = prev->pid; - entry->field.ctx.prev_prio = prev->prio; - entry->field.ctx.prev_state = prev->state; - entry->field.ctx.next_pid = next->pid; - entry->field.ctx.next_prio = next->prio; - entry->field.ctx.next_state = next->state; - entry->field.ctx.next_cpu = task_cpu(next); + tracing_generic_entry_update(&entry->ent, flags); + entry->ent.type = TRACE_CTX; + entry->prev_pid = prev->pid; + entry->prev_prio = prev->prio; + entry->prev_state = prev->state; + entry->next_pid = next->pid; + entry->next_prio = next->prio; + entry->next_state = next->state; + entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, flags, 5); } @@ -769,7 +769,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, unsigned long flags) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct ctx_switch_entry *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -777,15 +777,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, flags); - entry->type = TRACE_WAKE; - entry->field.ctx.prev_pid = curr->pid; - entry->field.ctx.prev_prio = curr->prio; - entry->field.ctx.prev_state = curr->state; - entry->field.ctx.next_pid = wakee->pid; - entry->field.ctx.next_prio = wakee->prio; - entry->field.ctx.next_state = wakee->state; - entry->field.ctx.next_cpu = task_cpu(wakee); + tracing_generic_entry_update(&entry->ent, flags); + entry->ent.type = TRACE_WAKE; + entry->prev_pid = curr->pid; + entry->prev_prio = curr->prio; + entry->prev_state = curr->state; + entry->next_pid = wakee->pid; + entry->next_prio = wakee->prio; + entry->next_state = wakee->state; + entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); __trace_stack(tr, data, flags, 6); @@ -1173,20 +1173,19 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) static void lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { - struct trace_field *field = &entry->field; int hardirq, softirq; char *comm; - comm = trace_find_cmdline(field->pid); + comm = trace_find_cmdline(entry->pid); - trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid); + trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); trace_seq_printf(s, "%3d", cpu); trace_seq_printf(s, "%c%c", - (field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', - ((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); - hardirq = field->flags & TRACE_FLAG_HARDIRQ; - softirq = field->flags & TRACE_FLAG_SOFTIRQ; + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; if (hardirq && softirq) { trace_seq_putc(s, 'H'); } else { @@ -1200,8 +1199,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) } } - if (field->preempt_count) - trace_seq_printf(s, "%x", field->preempt_count); + if (entry->preempt_count) + trace_seq_printf(s, "%x", entry->preempt_count); else trace_seq_puts(s, "."); } @@ -1230,6 +1229,7 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) { struct trace_entry *ent; + struct trace_field_cont *cont; bool ok = true; ent = peek_next_entry(iter, iter->cpu, NULL); @@ -1239,8 +1239,9 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) } do { + cont = (struct trace_field_cont *)ent; if (ok) - ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0); + ok = (trace_seq_printf(s, "%s", cont->buf) > 0); ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); ent = peek_next_entry(iter, iter->cpu, NULL); } while (ent && ent->type == TRACE_CONT); @@ -1257,7 +1258,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) struct trace_entry *next_entry; unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); struct trace_entry *entry = iter->ent; - struct trace_field *field = &entry->field; unsigned long abs_usecs; unsigned long rel_usecs; u64 next_ts; @@ -1276,12 +1276,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); if (verbose) { - comm = trace_find_cmdline(field->pid); + comm = trace_find_cmdline(entry->pid); trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, - field->pid, cpu, field->flags, - field->preempt_count, trace_idx, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, ns2usecs(iter->ts), abs_usecs/1000, abs_usecs % 1000, rel_usecs/1000, @@ -1291,53 +1291,69 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) lat_print_timestamp(s, abs_usecs, rel_usecs); } switch (entry->type) { - case TRACE_FN: - seq_print_ip_sym(s, field->fn.ip, sym_flags); + case TRACE_FN: { + struct ftrace_entry *field = (struct ftrace_entry *)entry; + + seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_puts(s, " ("); - if (kretprobed(field->fn.parent_ip)) + if (kretprobed(field->parent_ip)) trace_seq_puts(s, KRETPROBE_MSG); else - seq_print_ip_sym(s, field->fn.parent_ip, sym_flags); + seq_print_ip_sym(s, field->parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; + } case TRACE_CTX: - case TRACE_WAKE: - T = field->ctx.next_state < sizeof(state_to_char) ? - state_to_char[field->ctx.next_state] : 'X'; + case TRACE_WAKE: { + struct ctx_switch_entry *field = + (struct ctx_switch_entry *)entry; + + T = field->next_state < sizeof(state_to_char) ? + state_to_char[field->next_state] : 'X'; - state = field->ctx.prev_state ? - __ffs(field->ctx.prev_state) + 1 : 0; + state = field->prev_state ? + __ffs(field->prev_state) + 1 : 0; S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; - comm = trace_find_cmdline(field->ctx.next_pid); + comm = trace_find_cmdline(field->next_pid); trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", - field->ctx.prev_pid, - field->ctx.prev_prio, + field->prev_pid, + field->prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", - field->ctx.next_cpu, - field->ctx.next_pid, - field->ctx.next_prio, + field->next_cpu, + field->next_pid, + field->next_prio, T, comm); break; - case TRACE_SPECIAL: + } + case TRACE_SPECIAL: { + struct special_entry *field = (struct special_entry *)entry; + trace_seq_printf(s, "# %ld %ld %ld\n", - field->special.arg1, - field->special.arg2, - field->special.arg3); + field->arg1, + field->arg2, + field->arg3); break; - case TRACE_STACK: + } + case TRACE_STACK: { + struct stack_entry *field = (struct stack_entry *)entry; + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) trace_seq_puts(s, " <= "); - seq_print_ip_sym(s, field->stack.caller[i], sym_flags); + seq_print_ip_sym(s, field->caller[i], sym_flags); } trace_seq_puts(s, "\n"); break; - case TRACE_PRINT: - seq_print_ip_sym(s, field->print.ip, sym_flags); - trace_seq_printf(s, ": %s", field->print.buf); - if (field->flags & TRACE_FLAG_CONT) + } + case TRACE_PRINT: { + struct print_entry *field = (struct print_entry *)entry; + + seq_print_ip_sym(s, field->ip, sym_flags); + trace_seq_printf(s, ": %s", field->buf); + if (entry->flags & TRACE_FLAG_CONT) trace_seq_print_cont(s, iter); break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1349,7 +1365,6 @@ static int print_trace_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; - struct trace_field *field; unsigned long usec_rem; unsigned long long t; unsigned long secs; @@ -1363,15 +1378,13 @@ static int print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return 1; - field = &entry->field; - - comm = trace_find_cmdline(iter->ent->field.pid); + comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid); + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); if (!ret) return 0; ret = trace_seq_printf(s, "[%03d] ", iter->cpu); @@ -1382,20 +1395,22 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; switch (entry->type) { - case TRACE_FN: - ret = seq_print_ip_sym(s, field->fn.ip, sym_flags); + case TRACE_FN: { + struct ftrace_entry *field = (struct ftrace_entry *)entry; + + ret = seq_print_ip_sym(s, field->ip, sym_flags); if (!ret) return 0; if ((sym_flags & TRACE_ITER_PRINT_PARENT) && - field->fn.parent_ip) { + field->parent_ip) { ret = trace_seq_printf(s, " <-"); if (!ret) return 0; - if (kretprobed(field->fn.parent_ip)) + if (kretprobed(field->parent_ip)) ret = trace_seq_puts(s, KRETPROBE_MSG); else ret = seq_print_ip_sym(s, - field->fn.parent_ip, + field->parent_ip, sym_flags); if (!ret) return 0; @@ -1404,40 +1419,50 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + } case TRACE_CTX: - case TRACE_WAKE: - S = field->ctx.prev_state < sizeof(state_to_char) ? - state_to_char[field->ctx.prev_state] : 'X'; - T = field->ctx.next_state < sizeof(state_to_char) ? - state_to_char[field->ctx.next_state] : 'X'; + case TRACE_WAKE: { + struct ctx_switch_entry *field = + (struct ctx_switch_entry *)entry; + + S = field->prev_state < sizeof(state_to_char) ? + state_to_char[field->prev_state] : 'X'; + T = field->next_state < sizeof(state_to_char) ? + state_to_char[field->next_state] : 'X'; ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n", - field->ctx.prev_pid, - field->ctx.prev_prio, + field->prev_pid, + field->prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", - field->ctx.next_cpu, - field->ctx.next_pid, - field->ctx.next_prio, + field->next_cpu, + field->next_pid, + field->next_prio, T); if (!ret) return 0; break; - case TRACE_SPECIAL: + } + case TRACE_SPECIAL: { + struct special_entry *field = (struct special_entry *)entry; + ret = trace_seq_printf(s, "# %ld %ld %ld\n", - field->special.arg1, - field->special.arg2, - field->special.arg3); + field->arg1, + field->arg2, + field->arg3); if (!ret) return 0; break; - case TRACE_STACK: + } + case TRACE_STACK: { + struct stack_entry *field = (struct stack_entry *)entry; + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) { ret = trace_seq_puts(s, " <= "); if (!ret) return 0; } - ret = seq_print_ip_sym(s, field->stack.caller[i], + ret = seq_print_ip_sym(s, field->caller[i], sym_flags); if (!ret) return 0; @@ -1446,13 +1471,17 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; - case TRACE_PRINT: - seq_print_ip_sym(s, field->print.ip, sym_flags); - trace_seq_printf(s, ": %s", field->print.buf); - if (field->flags & TRACE_FLAG_CONT) + } + case TRACE_PRINT: { + struct print_entry *field = (struct print_entry *)entry; + + seq_print_ip_sym(s, field->ip, sym_flags); + trace_seq_printf(s, ": %s", field->buf); + if (entry->flags & TRACE_FLAG_CONT) trace_seq_print_cont(s, iter); break; } + } return 1; } @@ -1460,7 +1489,6 @@ static int print_raw_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; - struct trace_field *field; int ret; int S, T; @@ -1469,56 +1497,66 @@ static int print_raw_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return 1; - field = &entry->field; - ret = trace_seq_printf(s, "%d %d %llu ", - field->pid, iter->cpu, iter->ts); + entry->pid, iter->cpu, iter->ts); if (!ret) return 0; switch (entry->type) { - case TRACE_FN: + case TRACE_FN: { + struct ftrace_entry *field = (struct ftrace_entry *)entry; + ret = trace_seq_printf(s, "%x %x\n", - field->fn.ip, - field->fn.parent_ip); + field->ip, + field->parent_ip); if (!ret) return 0; break; + } case TRACE_CTX: - case TRACE_WAKE: - S = field->ctx.prev_state < sizeof(state_to_char) ? - state_to_char[field->ctx.prev_state] : 'X'; - T = field->ctx.next_state < sizeof(state_to_char) ? - state_to_char[field->ctx.next_state] : 'X'; + case TRACE_WAKE: { + struct ctx_switch_entry *field = + (struct ctx_switch_entry *)entry; + + S = field->prev_state < sizeof(state_to_char) ? + state_to_char[field->prev_state] : 'X'; + T = field->next_state < sizeof(state_to_char) ? + state_to_char[field->next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n", - field->ctx.prev_pid, - field->ctx.prev_prio, + field->prev_pid, + field->prev_prio, S, - field->ctx.next_cpu, - field->ctx.next_pid, - field->ctx.next_prio, + field->next_cpu, + field->next_pid, + field->next_prio, T); if (!ret) return 0; break; + } case TRACE_SPECIAL: - case TRACE_STACK: + case TRACE_STACK: { + struct special_entry *field = (struct special_entry *)entry; + ret = trace_seq_printf(s, "# %ld %ld %ld\n", - field->special.arg1, - field->special.arg2, - field->special.arg3); + field->arg1, + field->arg2, + field->arg3); if (!ret) return 0; break; - case TRACE_PRINT: - trace_seq_printf(s, "# %lx %s", - field->print.ip, field->print.buf); - if (field->flags & TRACE_FLAG_CONT) + } + case TRACE_PRINT: { + struct print_entry *field = (struct print_entry *)entry; + + trace_seq_printf(s, "# %lx %s", field->ip, field->buf); + if (entry->flags & TRACE_FLAG_CONT) trace_seq_print_cont(s, iter); break; } + } return 1; } @@ -1539,7 +1577,6 @@ static int print_hex_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; - struct trace_field *field; int S, T; entry = iter->ent; @@ -1547,40 +1584,48 @@ static int print_hex_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return 1; - field = &entry->field; - - SEQ_PUT_HEX_FIELD_RET(s, field->pid); + SEQ_PUT_HEX_FIELD_RET(s, entry->pid); SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); SEQ_PUT_HEX_FIELD_RET(s, iter->ts); switch (entry->type) { - case TRACE_FN: - SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip); - SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip); + case TRACE_FN: { + struct ftrace_entry *field = (struct ftrace_entry *)entry; + + SEQ_PUT_HEX_FIELD_RET(s, field->ip); + SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); break; + } case TRACE_CTX: - case TRACE_WAKE: - S = field->ctx.prev_state < sizeof(state_to_char) ? - state_to_char[field->ctx.prev_state] : 'X'; - T = field->ctx.next_state < sizeof(state_to_char) ? - state_to_char[field->ctx.next_state] : 'X'; + case TRACE_WAKE: { + struct ctx_switch_entry *field = + (struct ctx_switch_entry *)entry; + + S = field->prev_state < sizeof(state_to_char) ? + state_to_char[field->prev_state] : 'X'; + T = field->next_state < sizeof(state_to_char) ? + state_to_char[field->next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; - SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio); + SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); + SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); SEQ_PUT_HEX_FIELD_RET(s, S); - SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu); - SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio); + SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu); + SEQ_PUT_HEX_FIELD_RET(s, field->next_pid); + SEQ_PUT_HEX_FIELD_RET(s, field->next_prio); SEQ_PUT_HEX_FIELD_RET(s, T); break; + } case TRACE_SPECIAL: - case TRACE_STACK: - SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1); - SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2); - SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3); + case TRACE_STACK: { + struct special_entry *field = (struct special_entry *)entry; + + SEQ_PUT_HEX_FIELD_RET(s, field->arg1); + SEQ_PUT_HEX_FIELD_RET(s, field->arg2); + SEQ_PUT_HEX_FIELD_RET(s, field->arg3); break; } + } SEQ_PUT_FIELD_RET(s, newline); return 1; @@ -1590,39 +1635,46 @@ static int print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; - struct trace_field *field; entry = iter->ent; if (entry->type == TRACE_CONT) return 1; - field = &entry->field; - - SEQ_PUT_FIELD_RET(s, field->pid); - SEQ_PUT_FIELD_RET(s, field->cpu); + SEQ_PUT_FIELD_RET(s, entry->pid); + SEQ_PUT_FIELD_RET(s, iter->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); switch (entry->type) { - case TRACE_FN: - SEQ_PUT_FIELD_RET(s, field->fn.ip); - SEQ_PUT_FIELD_RET(s, field->fn.parent_ip); + case TRACE_FN: { + struct ftrace_entry *field = (struct ftrace_entry *)entry; + + SEQ_PUT_FIELD_RET(s, field->ip); + SEQ_PUT_FIELD_RET(s, field->parent_ip); break; - case TRACE_CTX: - SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid); - SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio); - SEQ_PUT_FIELD_RET(s, field->ctx.prev_state); - SEQ_PUT_FIELD_RET(s, field->ctx.next_pid); - SEQ_PUT_FIELD_RET(s, field->ctx.next_prio); - SEQ_PUT_FIELD_RET(s, field->ctx.next_state); + } + case TRACE_CTX: { + struct ctx_switch_entry *field = + (struct ctx_switch_entry *)entry; + + SEQ_PUT_FIELD_RET(s, field->prev_pid); + SEQ_PUT_FIELD_RET(s, field->prev_prio); + SEQ_PUT_FIELD_RET(s, field->prev_state); + SEQ_PUT_FIELD_RET(s, field->next_pid); + SEQ_PUT_FIELD_RET(s, field->next_prio); + SEQ_PUT_FIELD_RET(s, field->next_state); break; + } case TRACE_SPECIAL: - case TRACE_STACK: - SEQ_PUT_FIELD_RET(s, field->special.arg1); - SEQ_PUT_FIELD_RET(s, field->special.arg2); - SEQ_PUT_FIELD_RET(s, field->special.arg3); + case TRACE_STACK: { + struct special_entry *field = (struct special_entry *)entry; + + SEQ_PUT_FIELD_RET(s, field->arg1); + SEQ_PUT_FIELD_RET(s, field->arg2); + SEQ_PUT_FIELD_RET(s, field->arg3); break; } + } return 1; } @@ -2818,10 +2870,10 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct trace_entry *entry; + struct print_entry *entry; unsigned long flags, irq_flags; long disabled; - int cpu, len = 0, write, written = 0; + int cpu, len = 0, size; if (!tr->ctrl || tracing_disabled) return 0; @@ -2840,40 +2892,19 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) len = min(len, TRACE_BUF_SIZE-1); trace_buf[len] = 0; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + size = sizeof(*entry) + len + 1; + event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); if (!event) goto out_unlock; - entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, flags); - entry->type = TRACE_PRINT; - entry->field.print.ip = ip; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags); + entry->ent.type = TRACE_PRINT; + entry->ip = ip; - write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1)); - - memcpy(&entry->field.print.buf, trace_buf, write); - entry->field.print.buf[write] = 0; - written = write; + memcpy(&entry->buf, trace_buf, len); + entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - if (written != len) - entry->field.flags |= TRACE_FLAG_CONT; - - while (written != len) { - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - - entry->type = TRACE_CONT; - write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1)); - memcpy(&entry->cont.buf, trace_buf+written, write); - entry->cont.buf[write] = 0; - written += write; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - } - out_unlock: spin_unlock(&trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f6965f775b4..e541a6b7e31 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,10 +26,25 @@ enum trace_type { __TRACE_LAST_TYPE }; +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned char type; + unsigned char cpu; + unsigned char flags; + unsigned char preempt_count; + int pid; +}; + /* * Function trace entry - function address and parent function addres: */ struct ftrace_entry { + struct trace_entry ent; unsigned long ip; unsigned long parent_ip; }; @@ -39,6 +54,7 @@ extern struct tracer boot_tracer; * Context switch trace entry - which task (and prio) we switched from/to: */ struct ctx_switch_entry { + struct trace_entry ent; unsigned int prev_pid; unsigned char prev_prio; unsigned char prev_state; @@ -52,6 +68,7 @@ struct ctx_switch_entry { * Special (free-form) trace entry: */ struct special_entry { + struct trace_entry ent; unsigned long arg1; unsigned long arg2; unsigned long arg3; @@ -64,6 +81,7 @@ struct special_entry { #define FTRACE_STACK_ENTRIES 8 struct stack_entry { + struct trace_entry ent; unsigned long caller[FTRACE_STACK_ENTRIES]; }; @@ -71,10 +89,34 @@ struct stack_entry { * ftrace_printk entry: */ struct print_entry { + struct trace_entry ent; unsigned long ip; char buf[]; }; +#define TRACE_OLD_SIZE 88 + +struct trace_field_cont { + unsigned char type; + /* Temporary till we get rid of this completely */ + char buf[TRACE_OLD_SIZE - 1]; +}; + +struct trace_mmiotrace_rw { + struct trace_entry ent; + struct mmiotrace_rw rw; +}; + +struct trace_mmiotrace_map { + struct trace_entry ent; + struct mmiotrace_map map; +}; + +struct trace_boot { + struct trace_entry ent; + struct boot_trace initcall; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -92,46 +134,7 @@ enum trace_flag_type { TRACE_FLAG_CONT = 0x10, }; -/* - * The trace field - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_field { - char cpu; - char flags; - char preempt_count; - int pid; - union { - struct ftrace_entry fn; - struct ctx_switch_entry ctx; - struct special_entry special; - struct stack_entry stack; - struct print_entry print; - struct mmiotrace_rw mmiorw; - struct mmiotrace_map mmiomap; - struct boot_trace initcall; - }; -}; - -struct trace_field_cont { - char buf[sizeof(struct trace_field)]; -}; - -struct trace_entry { - char type; - union { - struct trace_field field; - struct trace_field_cont cont; - }; -}; - -#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) #define TRACE_BUF_SIZE 1024 -#define TRACE_PRINT_BUF_SIZE \ - (sizeof(struct trace_field) - offsetof(struct trace_field, print.buf)) -#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field) /* * The CPU trace array - it consists of thousands of trace entries diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 3657eec6b87..fa8cca1be11 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -49,10 +49,11 @@ static int initcall_print_line(struct trace_iterator *iter) { int ret = 0; struct trace_entry *entry = iter->ent; - struct boot_trace *it = &entry->field.initcall; + struct trace_boot *field = (struct trace_boot *)entry; + struct boot_trace *it = &field->initcall; struct trace_seq *s = &iter->seq; - if (iter->ent->type == TRACE_BOOT) + if (entry->type == TRACE_BOOT) ret = trace_seq_printf(s, "%pF called from %i " "returned %d after %lld msecs\n", it->func, it->caller, it->result, @@ -75,7 +76,7 @@ struct tracer boot_tracer __read_mostly = void trace_boot(struct boot_trace *it) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct trace_boot *entry; struct trace_array_cpu *data; unsigned long irq_flags; struct trace_array *tr = boot_trace; @@ -91,9 +92,9 @@ void trace_boot(struct boot_trace *it) if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_BOOT; - entry->field.initcall = *it; + tracing_generic_entry_update(&entry->ent, 0); + entry->ent.type = TRACE_BOOT; + entry->initcall = *it; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index bdbf09d8413..3df441ea274 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -178,14 +178,16 @@ print_out: static int mmio_print_rw(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; - struct mmiotrace_rw *rw = &entry->field.mmiorw; + struct trace_mmiotrace_rw *field = + (struct trace_mmiotrace_rw *)entry; + struct mmiotrace_rw *rw = &field->rw; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret = 1; - switch (entry->field.mmiorw.opcode) { + switch (rw->opcode) { case MMIO_READ: ret = trace_seq_printf(s, "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", @@ -220,14 +222,14 @@ static int mmio_print_rw(struct trace_iterator *iter) static int mmio_print_map(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; - struct mmiotrace_map *m = &entry->field.mmiomap; + struct mmiotrace_map *m = (struct mmiotrace_map *)entry; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret = 1; - switch (entry->field.mmiorw.opcode) { + switch (m->opcode) { case MMIO_PROBE: ret = trace_seq_printf(s, "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", @@ -252,7 +254,8 @@ static int mmio_print_map(struct trace_iterator *iter) static int mmio_print_mark(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; - const char *msg = entry->field.print.buf; + struct print_entry *print = (struct print_entry *)entry; + const char *msg = print->buf; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); @@ -264,7 +267,7 @@ static int mmio_print_mark(struct trace_iterator *iter) if (!ret) return 0; - if (entry->field.flags & TRACE_FLAG_CONT) + if (entry->flags & TRACE_FLAG_CONT) trace_seq_print_cont(s, iter); return 1; @@ -308,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct mmiotrace_rw *rw) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct trace_mmiotrace_rw *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -316,9 +319,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_MMIO_RW; - entry->field.mmiorw = *rw; + tracing_generic_entry_update(&entry->ent, 0); + entry->ent.type = TRACE_MMIO_RW; + entry->rw = *rw; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); @@ -336,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct mmiotrace_map *map) { struct ring_buffer_event *event; - struct trace_entry *entry; + struct trace_mmiotrace_map *entry; unsigned long irq_flags; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), @@ -344,9 +347,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_MMIO_MAP; - entry->field.mmiomap = *map; + tracing_generic_entry_update(&entry->ent, 0); + entry->ent.type = TRACE_MMIO_MAP; + entry->map = *map; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); -- cgit v1.2.3 From 2c4f035f6c3e8fda661eb6105aa51ef07aa71607 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Sep 2008 20:18:34 +0200 Subject: tracing/ftrace: change the type of the print_line callback We need a kind of disambiguation when a print_line callback returns 0. _There is not enough space to print all the entry. Please flush the seq and retry. _I can't handle this type of entry This patch changes the type of this callback for better information. Also some changes have been made in this V2. _ Only relay to default functions after the print_line callback fails. _ This patch doesn't fix the issue with the broken pipe (see patch 2/4 for that) Some things are still in discussion: _ Find better names for the enum print_line_t values _ Change the type of print_trace_line into boolean. Patches to change that can be sent later. Signed-off-by: Frederic Weisbecker Acked-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 77 ++++++++++++++++++++++++++++------------------------ kernel/trace/trace.h | 10 ++++++- 2 files changed, 50 insertions(+), 37 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed9e47c1881..b38a4bb4054 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1250,7 +1250,7 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) trace_seq_putc(s, '\n'); } -static int +static enum print_line_t print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; @@ -1267,7 +1267,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned state; if (entry->type == TRACE_CONT) - return 1; + return TRACE_TYPE_HANDLED; next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) @@ -1357,10 +1357,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } - return 1; + return TRACE_TYPE_HANDLED; } -static int print_trace_fmt(struct trace_iterator *iter) +static enum print_line_t print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1376,7 +1376,7 @@ static int print_trace_fmt(struct trace_iterator *iter) entry = iter->ent; if (entry->type == TRACE_CONT) - return 1; + return TRACE_TYPE_HANDLED; comm = trace_find_cmdline(iter->ent->pid); @@ -1386,13 +1386,13 @@ static int print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; ret = trace_seq_printf(s, "[%03d] ", iter->cpu); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; switch (entry->type) { case TRACE_FN: { @@ -1400,12 +1400,12 @@ static int print_trace_fmt(struct trace_iterator *iter) ret = seq_print_ip_sym(s, field->ip, sym_flags); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; if ((sym_flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { ret = trace_seq_printf(s, " <-"); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; if (kretprobed(field->parent_ip)) ret = trace_seq_puts(s, KRETPROBE_MSG); else @@ -1413,11 +1413,11 @@ static int print_trace_fmt(struct trace_iterator *iter) field->parent_ip, sym_flags); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; } ret = trace_seq_printf(s, "\n"); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_CTX: @@ -1439,7 +1439,7 @@ static int print_trace_fmt(struct trace_iterator *iter) field->next_prio, T); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_SPECIAL: { @@ -1450,7 +1450,7 @@ static int print_trace_fmt(struct trace_iterator *iter) field->arg2, field->arg3); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_STACK: { @@ -1460,16 +1460,16 @@ static int print_trace_fmt(struct trace_iterator *iter) if (i) { ret = trace_seq_puts(s, " <= "); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; } ret = seq_print_ip_sym(s, field->caller[i], sym_flags); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; } ret = trace_seq_puts(s, "\n"); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_PRINT: { @@ -1482,10 +1482,10 @@ static int print_trace_fmt(struct trace_iterator *iter) break; } } - return 1; + return TRACE_TYPE_HANDLED; } -static int print_raw_fmt(struct trace_iterator *iter) +static enum print_line_t print_raw_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1495,12 +1495,12 @@ static int print_raw_fmt(struct trace_iterator *iter) entry = iter->ent; if (entry->type == TRACE_CONT) - return 1; + return TRACE_TYPE_HANDLED; ret = trace_seq_printf(s, "%d %d %llu ", entry->pid, iter->cpu, iter->ts); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; switch (entry->type) { case TRACE_FN: { @@ -1510,7 +1510,7 @@ static int print_raw_fmt(struct trace_iterator *iter) field->ip, field->parent_ip); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_CTX: @@ -1533,7 +1533,7 @@ static int print_raw_fmt(struct trace_iterator *iter) field->next_prio, T); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_SPECIAL: @@ -1545,7 +1545,7 @@ static int print_raw_fmt(struct trace_iterator *iter) field->arg2, field->arg3); if (!ret) - return 0; + return TRACE_TYPE_PARTIAL_LINE; break; } case TRACE_PRINT: { @@ -1557,7 +1557,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; } } - return 1; + return TRACE_TYPE_HANDLED; } #define SEQ_PUT_FIELD_RET(s, x) \ @@ -1572,7 +1572,7 @@ do { \ return 0; \ } while (0) -static int print_hex_fmt(struct trace_iterator *iter) +static enum print_line_t print_hex_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; @@ -1582,7 +1582,7 @@ static int print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; if (entry->type == TRACE_CONT) - return 1; + return TRACE_TYPE_HANDLED; SEQ_PUT_HEX_FIELD_RET(s, entry->pid); SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); @@ -1628,10 +1628,10 @@ static int print_hex_fmt(struct trace_iterator *iter) } SEQ_PUT_FIELD_RET(s, newline); - return 1; + return TRACE_TYPE_HANDLED; } -static int print_bin_fmt(struct trace_iterator *iter) +static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1639,7 +1639,7 @@ static int print_bin_fmt(struct trace_iterator *iter) entry = iter->ent; if (entry->type == TRACE_CONT) - return 1; + return TRACE_TYPE_HANDLED; SEQ_PUT_FIELD_RET(s, entry->pid); SEQ_PUT_FIELD_RET(s, iter->cpu); @@ -1686,13 +1686,18 @@ static int trace_empty(struct trace_iterator *iter) if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) return 0; } - return 1; + return TRACE_TYPE_HANDLED; } -static int print_trace_line(struct trace_iterator *iter) +static enum print_line_t print_trace_line(struct trace_iterator *iter) { - if (iter->trace && iter->trace->print_line) - return iter->trace->print_line(iter); + enum print_line_t ret; + + if (iter->trace && iter->trace->print_line) { + ret = iter->trace->print_line(iter); + if (ret != TRACE_TYPE_UNHANDLED) + return ret; + } if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); @@ -2525,11 +2530,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, ring_buffer_lock(iter->tr->buffer, &flags); while (find_next_entry_inc(iter) != NULL) { - int ret; + enum print_line_t ret; int len = iter->seq.len; ret = print_trace_line(iter); - if (!ret) { + if (ret == TRACE_TYPE_PARTIAL_LINE) { /* don't print partial lines */ iter->seq.len = len; break; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e541a6b7e31..a921ba5d292 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -177,6 +177,14 @@ struct trace_array { struct trace_array_cpu *data[NR_CPUS]; }; + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ +}; + /* * A specific tracer, represented by methods that operate on a trace array: */ @@ -197,7 +205,7 @@ struct tracer { int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif - int (*print_line)(struct trace_iterator *iter); + enum print_line_t (*print_line)(struct trace_iterator *iter); struct tracer *next; int print_max; }; -- cgit v1.2.3 From 7104f300c5a69b46dda00d898034dd05c9f21739 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Oct 2008 10:52:51 -0400 Subject: ftrace: type cast filter+verifier The mmiotrace map had a bug that would typecast the entry from the trace to the wrong type. That is a known danger of C typecasts, there's absolutely zero checking done on them. Help that problem a bit by using a GCC extension to implement a type filter that restricts the types that a trace record can be cast into, and by adding a dynamic check (in debug mode) to verify the type of the entry. This patch adds a macro to assign all entries of ftrace using the type of the variable and checking the entry id. The typecasts are now done in the macro for only those types that it knows about, which should be all the types that are allowed to be read from the tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 85 +++++++++++++++++++++++++++++------------- kernel/trace/trace.h | 42 +++++++++++++++++++++ kernel/trace/trace_mmiotrace.c | 14 +++++-- 3 files changed, 112 insertions(+), 29 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1634068adf..948f7d821c6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1350,7 +1350,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_puts(s, " ("); @@ -1363,8 +1365,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); T = field->next_state < sizeof(state_to_char) ? state_to_char[field->next_state] : 'X'; @@ -1384,7 +1387,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_SPECIAL: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1393,7 +1398,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_STACK: { - struct stack_entry *field = (struct stack_entry *)entry; + struct stack_entry *field; + + trace_assign_type(field, entry); for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) @@ -1404,7 +1411,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); @@ -1454,7 +1463,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); ret = seq_print_ip_sym(s, field->ip, sym_flags); if (!ret) @@ -1480,8 +1491,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1501,7 +1513,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1512,7 +1526,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_STACK: { - struct stack_entry *field = (struct stack_entry *)entry; + struct stack_entry *field; + + trace_assign_type(field, entry); for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { if (i) { @@ -1531,7 +1547,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_printf(s, ": %s", field->buf); @@ -1562,7 +1580,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "%x %x\n", field->ip, @@ -1573,8 +1593,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1596,7 +1617,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) } case TRACE_SPECIAL: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); ret = trace_seq_printf(s, "# %ld %ld %ld\n", field->arg1, @@ -1607,7 +1630,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_PRINT: { - struct print_entry *field = (struct print_entry *)entry; + struct print_entry *field; + + trace_assign_type(field, entry); trace_seq_printf(s, "# %lx %s", field->ip, field->buf); if (entry->flags & TRACE_FLAG_CONT) @@ -1648,7 +1673,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_HEX_FIELD_RET(s, field->ip); SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); @@ -1656,8 +1683,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } case TRACE_CTX: case TRACE_WAKE: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); S = field->prev_state < sizeof(state_to_char) ? state_to_char[field->prev_state] : 'X'; @@ -1676,7 +1704,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } case TRACE_SPECIAL: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_HEX_FIELD_RET(s, field->arg1); SEQ_PUT_HEX_FIELD_RET(s, field->arg2); @@ -1705,15 +1735,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) switch (entry->type) { case TRACE_FN: { - struct ftrace_entry *field = (struct ftrace_entry *)entry; + struct ftrace_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->ip); SEQ_PUT_FIELD_RET(s, field->parent_ip); break; } case TRACE_CTX: { - struct ctx_switch_entry *field = - (struct ctx_switch_entry *)entry; + struct ctx_switch_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->prev_pid); SEQ_PUT_FIELD_RET(s, field->prev_prio); @@ -1725,7 +1758,9 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) } case TRACE_SPECIAL: case TRACE_STACK: { - struct special_entry *field = (struct special_entry *)entry; + struct special_entry *field; + + trace_assign_type(field, entry); SEQ_PUT_FIELD_RET(s, field->arg1); SEQ_PUT_FIELD_RET(s, field->arg2); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a921ba5d292..f02042d0d82 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -177,6 +177,48 @@ struct trace_array { struct trace_array_cpu *data[NR_CPUS]; }; +#define FTRACE_CMP_TYPE(var, type) \ + __builtin_types_compatible_p(typeof(var), type *) + +#undef IF_ASSIGN +#define IF_ASSIGN(var, entry, etype, id) \ + if (FTRACE_CMP_TYPE(var, etype)) { \ + var = (typeof(var))(entry); \ + WARN_ON(id && (entry)->type != id); \ + break; \ + } + +/* Will cause compile errors if type is not found. */ +extern void __ftrace_bad_type(void); + +/* + * The trace_assign_type is a verifier that the entry type is + * the same as the type being assigned. To add new types simply + * add a line with the following format: + * + * IF_ASSIGN(var, ent, type, id); + * + * Where "type" is the trace type that includes the trace_entry + * as the "ent" item. And "id" is the trace identifier that is + * used in the trace_type enum. + * + * If the type can have more than one id, then use zero. + */ +#define trace_assign_type(var, ent) \ + do { \ + IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ + IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ + IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ + IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct special_entry, 0); \ + IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ + TRACE_MMIO_RW); \ + IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ + TRACE_MMIO_MAP); \ + IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ + __ftrace_bad_type(); \ + } while (0) /* Return values for print_line callback */ enum print_line_t { diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 1a266aa08e1..0e819f47bb7 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -178,15 +178,17 @@ print_out: static enum print_line_t mmio_print_rw(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; - struct trace_mmiotrace_rw *field = - (struct trace_mmiotrace_rw *)entry; - struct mmiotrace_rw *rw = &field->rw; + struct trace_mmiotrace_rw *field; + struct mmiotrace_rw *rw; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret = 1; + trace_assign_type(field, entry); + rw = &field->rw; + switch (rw->opcode) { case MMIO_READ: ret = trace_seq_printf(s, @@ -222,13 +224,17 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) static enum print_line_t mmio_print_map(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; - struct mmiotrace_map *m = (struct mmiotrace_map *)entry; + struct trace_mmiotrace_map *field; + struct mmiotrace_map *m; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, 1000000ULL); unsigned secs = (unsigned long)t; int ret; + trace_assign_type(field, entry); + m = &field->map; + switch (m->opcode) { case MMIO_PROBE: ret = trace_seq_printf(s, -- cgit v1.2.3 From 38697053fa006411224a1790e2adb8216440ab0f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Oct 2008 13:14:09 -0400 Subject: ftrace: preempt disable over interrupt disable With the new ring buffer infrastructure in ftrace, I'm trying to make ftrace a little more light weight. This patch converts a lot of the local_irq_save/restore into preempt_disable/enable. The original preempt count in a lot of cases has to be sent in as a parameter so that it can be recorded correctly. Some places were recording it incorrectly before anyway. This is also laying the ground work to make ftrace a little bit more reentrant, and remove all locking. The function tracers must still protect from reentrancy. Note: All the function tracers must be careful when using preempt_disable. It must do the following: resched = need_resched(); preempt_disable_notrace(); [...] if (resched) preempt_enable_no_resched_notrace(); else preempt_enable_notrace(); The reason is that if this function traces schedule() itself, the preempt_enable_notrace() will cause a schedule, which will lead us into a recursive failure. If we needed to reschedule before calling preempt_disable, we should have already scheduled. Since we did not, this is most likely that we should not and are probably inside a schedule function. If resched was not set, we still need to catch the need resched flag being set when preemption was off and the if case at the end will catch that for us. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 123 +++++++++++++++++++------------------- kernel/trace/trace.h | 13 ++-- kernel/trace/trace_boot.c | 2 +- kernel/trace/trace_irqsoff.c | 13 ++-- kernel/trace/trace_mmiotrace.c | 4 +- kernel/trace/trace_sched_switch.c | 9 ++- kernel/trace/trace_sched_wakeup.c | 13 +++- 7 files changed, 97 insertions(+), 80 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 948f7d821c6..1cd2e8143bb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -652,12 +652,10 @@ void tracing_record_cmdline(struct task_struct *tsk) } void -tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) +tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, + int pc) { struct task_struct *tsk = current; - unsigned long pc; - - pc = preempt_count(); entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; @@ -670,7 +668,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) void trace_function(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags, + int pc) { struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -685,7 +684,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; @@ -694,16 +693,17 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, void ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags, + int pc) { if (likely(!atomic_read(&data->disabled))) - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, pc); } -void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, - int skip) +static void ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) { struct ring_buffer_event *event; struct stack_entry *entry; @@ -718,7 +718,7 @@ void __trace_stack(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_STACK; memset(&entry->caller, 0, sizeof(entry->caller)); @@ -732,9 +732,18 @@ void __trace_stack(struct trace_array *tr, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip) +{ + ftrace_trace_stack(tr, data, flags, skip, preempt_count()); +} + +static void +ftrace_trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + int pc) { struct ring_buffer_event *event; struct trace_array_cpu *data = __data; @@ -747,23 +756,30 @@ __trace_special(void *__tr, void *__data, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0); + tracing_generic_entry_update(&entry->ent, 0, pc); entry->ent.type = TRACE_SPECIAL; entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, irq_flags, 4); + ftrace_trace_stack(tr, data, irq_flags, 4, pc); trace_wake_up(); } +void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); +} + void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, - unsigned long flags) + unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -774,7 +790,7 @@ tracing_sched_switch_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_CTX; entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; @@ -784,7 +800,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, flags, 5); + ftrace_trace_stack(tr, data, flags, 5, pc); } void @@ -792,7 +808,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags) + unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -803,7 +819,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_WAKE; entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; @@ -813,7 +829,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - __trace_stack(tr, data, flags, 6); + ftrace_trace_stack(tr, data, flags, 6, pc); trace_wake_up(); } @@ -823,23 +839,24 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - unsigned long flags; long disabled; int cpu; + int pc; if (tracing_disabled || !tr->ctrl) return; - local_irq_save(flags); + pc = preempt_count(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - __trace_special(tr, data, arg1, arg2, arg3); + ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); atomic_dec(&data->disabled); - local_irq_restore(flags); + preempt_enable_notrace(); } #ifdef CONFIG_FTRACE @@ -850,7 +867,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; long disabled; - int cpu; + int cpu, resched; + int pc; if (unlikely(!ftrace_function_enabled)) return; @@ -858,16 +876,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (skip_trace(ip)) return; - local_irq_save(flags); + pc = preempt_count(); + resched = need_resched(); + preempt_disable_notrace(); + local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - local_irq_restore(flags); + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = @@ -2508,9 +2532,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; -#ifdef CONFIG_FTRACE - int ftrace_save; -#endif ssize_t sret; /* return any leftover data */ @@ -2593,20 +2614,6 @@ waitagain: offsetof(struct trace_iterator, seq)); iter->pos = -1; - /* - * We need to stop all tracing on all CPUS to read the - * the next buffer. This is a bit expensive, but is - * not done often. We fill all what we can read, - * and then release the locks again. - */ - - local_irq_disable(); -#ifdef CONFIG_FTRACE - ftrace_save = ftrace_enabled; - ftrace_enabled = 0; -#endif - smp_wmb(); - while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -2624,11 +2631,6 @@ waitagain: break; } -#ifdef CONFIG_FTRACE - ftrace_enabled = ftrace_save; -#endif - local_irq_enable(); - /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= iter->seq.len) @@ -2960,12 +2962,13 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) struct print_entry *entry; unsigned long flags, irq_flags; long disabled; - int cpu, len = 0, size; + int cpu, len = 0, size, pc; if (!tr->ctrl || tracing_disabled) return 0; - local_irq_save(flags); + pc = preempt_count(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -2973,7 +2976,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (unlikely(disabled != 1)) goto out; - spin_lock(&trace_buf_lock); + spin_lock_irqsave(&trace_buf_lock, flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -2984,7 +2987,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags); + tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; @@ -2993,11 +2996,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock(&trace_buf_lock); + spin_unlock_irqrestore(&trace_buf_lock, flags); out: atomic_dec(&data->disabled); - local_irq_restore(flags); + preempt_enable_notrace(); return len; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f02042d0d82..f1f99572cde 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -288,35 +288,36 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags); + unsigned long flags, + int pc); void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, - unsigned long flags); + unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, - unsigned long flags); + unsigned long flags, int pc); void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *cur, - unsigned long flags); + unsigned long flags, int pc); void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, - unsigned long arg3); + unsigned long arg3, int pc); void trace_function(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, - unsigned long flags); + unsigned long flags, int pc); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 43bde20b95b..f2dac6f1cf0 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -95,7 +95,7 @@ void trace_boot(struct boot_trace *it) if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0); + tracing_generic_entry_update(&entry->ent, 0, 0); entry->ent.type = TRACE_BOOT; entry->initcall = *it; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 37ad49407f2..f925dbbff2a 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, preempt_count()); atomic_dec(&data->disabled); } @@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr, unsigned long latency, t0, t1; cycle_t T0, T1, delta; unsigned long flags; + int pc; /* * usecs conversion is slow so we try to delay the conversion @@ -144,13 +145,15 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; + pc = preempt_count(); + spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ if (!report_latency(delta)) goto out_unlock; - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); latency = nsecs_to_usecs(delta); @@ -174,7 +177,7 @@ out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); tracing_reset(tr, cpu); - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -207,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -241,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0e819f47bb7..f28484618ff 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -324,7 +324,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0); + tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_RW; entry->rw = *rw; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); @@ -352,7 +352,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0); + tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_MAP; entry->map = *map; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e0b06db0f7a..c7fa08a5b7f 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -26,6 +26,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, unsigned long flags; long disabled; int cpu; + int pc; if (!atomic_read(&sched_ref)) return; @@ -36,13 +37,14 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, if (!tracer_enabled) return; + pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - tracing_sched_switch_trace(ctx_trace, data, prev, next, flags); + tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -54,11 +56,12 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee) struct trace_array_cpu *data; unsigned long flags; long disabled; - int cpu; + int cpu, pc; if (!likely(tracer_enabled)) return; + pc = preempt_count(); tracing_record_cmdline(current); local_irq_save(flags); @@ -68,7 +71,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee) if (likely(disabled == 1)) tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, - flags); + flags, pc); atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 01e75e0639b..fe4a252c236 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) long disabled; int resched; int cpu; + int pc; if (likely(!wakeup_task)) return; + pc = preempt_count(); resched = need_resched(); preempt_disable_notrace(); @@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) if (task_cpu(wakeup_task) != cpu) goto unlock; - trace_function(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags, pc); unlock: __raw_spin_unlock(&wakeup_lock); @@ -121,6 +123,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, unsigned long flags; long disabled; int cpu; + int pc; tracing_record_cmdline(prev); @@ -139,6 +142,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (next != wakeup_task) return; + pc = preempt_count(); + /* The task we are waiting for is waking up */ data = wakeup_trace->data[wakeup_cpu]; @@ -155,7 +160,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); /* * usecs conversion is slow so we try to delay the conversion @@ -220,6 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p) int cpu = smp_processor_id(); unsigned long flags; long disabled; + int pc; if (likely(!tracer_enabled)) return; @@ -232,6 +238,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p) p->prio >= current->prio) return; + pc = preempt_count(); disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); if (unlikely(disabled != 1)) goto out; @@ -256,7 +263,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p) wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], - CALLER_ADDR1, CALLER_ADDR2, flags); + CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: __raw_spin_unlock(&wakeup_lock); -- cgit v1.2.3