From 7f85803a26f304e698c673838aab06cc6d4d6e59 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 14 Jan 2011 17:49:02 +0800 Subject: tracing: Remove syscall_exit_fields There is no need for syscall_exit_fields as the syscall exit event class can already host the fields in its structure, like most other trace events do by default. Use that default behavior instead. Following this scheme, we don't need anymore to override the get_fields() callback of the syscall exit event class either. Hence both syscall_exit_fields and syscall_get_exit_fields() can be removed. Also changed some indentation to keep the following under 80 characters: ".fields = LIST_HEAD_INIT(event_class_syscall_exit.fields)," Acked-by: Frederic Weisbecker Signed-off-by: Lai Jiangshan LKML-Reference: <4D301C0E.8090408@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bac752f0cfb..b706529b4fc 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event, static int syscall_enter_define_fields(struct ftrace_event_call *call); static int syscall_exit_define_fields(struct ftrace_event_call *call); -/* All syscall exit events have the same fields */ -static LIST_HEAD(syscall_exit_fields); - static struct list_head * syscall_get_enter_fields(struct ftrace_event_call *call) { @@ -34,34 +31,28 @@ syscall_get_enter_fields(struct ftrace_event_call *call) return &entry->enter_fields; } -static struct list_head * -syscall_get_exit_fields(struct ftrace_event_call *call) -{ - return &syscall_exit_fields; -} - struct trace_event_functions enter_syscall_print_funcs = { - .trace = print_syscall_enter, + .trace = print_syscall_enter, }; struct trace_event_functions exit_syscall_print_funcs = { - .trace = print_syscall_exit, + .trace = print_syscall_exit, }; struct ftrace_event_class event_class_syscall_enter = { - .system = "syscalls", - .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, - .get_fields = syscall_get_enter_fields, - .raw_init = init_syscall_trace, + .system = "syscalls", + .reg = syscall_enter_register, + .define_fields = syscall_enter_define_fields, + .get_fields = syscall_get_enter_fields, + .raw_init = init_syscall_trace, }; struct ftrace_event_class event_class_syscall_exit = { - .system = "syscalls", - .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, - .get_fields = syscall_get_exit_fields, - .raw_init = init_syscall_trace, + .system = "syscalls", + .reg = syscall_exit_register, + .define_fields = syscall_exit_define_fields, + .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), + .raw_init = init_syscall_trace, }; extern unsigned long __start_syscalls_metadata[]; -- cgit v1.2.3 From 490da40d82b31c0562d3f5edb37810f492ca1c34 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 19 Jan 2011 10:51:44 +0800 Subject: blktrace: Don't output messages if NOTIFY isn't set. Now if we enable blktrace, cfq has too many messages output to the trace buffer. It is fine if we don't specify any action mask. But if I do like this: blktrace /dev/sdb -a issue -a complete -o - | blkparse -i - I only want to see 'D' and 'C', while with the following command dd if=/mnt/ocfs2/test of=/dev/null bs=4k count=1 iflag=direct I will get(with a 2.6.37 vanilla kernel): 8,16 0 0 0.000000000 0 m N cfq3805 alloced 8,16 0 0 0.000004126 0 m N cfq3805 insert_request 8,16 0 0 0.000004884 0 m N cfq3805 add_to_rr 8,16 0 0 0.000008417 0 m N cfq workload slice:300 8,16 0 0 0.000009557 0 m N cfq3805 set_active wl_prio:0 wl_type:2 8,16 0 0 0.000010640 0 m N cfq3805 fifo= (null) 8,16 0 0 0.000011193 0 m N cfq3805 dispatch_insert 8,16 0 0 0.000012221 0 m N cfq3805 dispatched a request 8,16 0 0 0.000012802 0 m N cfq3805 activate rq, drv=1 8,16 0 1 0.000013181 3805 D R 114759 + 8 [dd] 8,16 0 2 0.000164244 0 C R 114759 + 8 [0] 8,16 0 0 0.000167997 0 m N cfq3805 complete rqnoidle 0 8,16 0 0 0.000168782 0 m N cfq3805 set_slice=100 8,16 0 0 0.000169874 0 m N cfq3805 arm_idle: 8 group_idle: 0 8,16 0 0 0.000170189 0 m N cfq schedule dispatch 8,16 0 0 0.000397938 0 m N cfq3805 slice expired t=0 8,16 0 0 0.000399763 0 m N cfq3805 sl_used=1 disp=1 charge=1 iops=0 sect=8 8,16 0 0 0.000400227 0 m N cfq3805 del_from_rr 8,16 0 0 0.000400882 0 m N cfq3805 put_queue See, there are 19 lines while I only need 2. I don't think it is appropriate for a user. So this patch will disable any messages if the BLK_TC_NOTIFY isn't set. Now the output for the same command will look like: 8,16 0 1 0.000000000 4908 D R 114759 + 8 [dd] 8,16 0 2 0.000146827 0 C R 114759 + 8 [0] Yes, it is what I want to see. Cc: Steven Rostedt Cc: Jeff Moyer Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93..d95721f3370 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) !blk_tracer_enabled)) return; + /* + * If the BLK_TC_NOTIFY action mask isn't set, don't send any note + * message to the trace. + */ + if (!(bt->act_mask & BLK_TC_NOTIFY)) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); -- cgit v1.2.3 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 2 +- include/linux/kernel.h | 2 ++ include/linux/lockdep.h | 8 -------- init/main.c | 13 +++++++++++-- kernel/lockdep.c | 18 +----------------- kernel/trace/trace_irqsoff.c | 8 -------- 6 files changed, 15 insertions(+), 36 deletions(-) (limited to 'kernel/trace') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7e8d3bc80af..50542efe45f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1194,7 +1194,7 @@ asmlinkage void __init xen_start_kernel(void) per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; memblock_init(); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5a9d9059520..d07d8057e44 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -243,6 +243,8 @@ extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; +extern bool early_boot_irqs_disabled; + /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c75..f638fd78d10 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -436,16 +436,8 @@ do { \ #endif /* CONFIG_LOCKDEP */ #ifdef CONFIG_TRACE_IRQFLAGS -extern void early_boot_irqs_off(void); -extern void early_boot_irqs_on(void); extern void print_irqtrace_events(struct task_struct *curr); #else -static inline void early_boot_irqs_off(void) -{ -} -static inline void early_boot_irqs_on(void) -{ -} static inline void print_irqtrace_events(struct task_struct *curr) { } diff --git a/init/main.c b/init/main.c index 00799c1d462..33c37c379e9 100644 --- a/init/main.c +++ b/init/main.c @@ -96,6 +96,15 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif +/* + * Debug helper: via this flag we know that we are in 'early bootup code' + * where only the boot processor is running with IRQ disabled. This means + * two things - IRQ must not be enabled before the flag is cleared and some + * operations which are not allowed with IRQ disabled are allowed while the + * flag is set. + */ +bool early_boot_irqs_disabled __read_mostly; + enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); @@ -554,7 +563,7 @@ asmlinkage void __init start_kernel(void) cgroup_init_early(); local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then @@ -621,7 +630,7 @@ asmlinkage void __init start_kernel(void) if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); - early_boot_irqs_on(); + early_boot_irqs_disabled = false; local_irq_enable(); /* Interrupts are enabled now so all GFP allocations are safe. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ba65dff7d..0d2058da80f 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2291,22 +2291,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) return 1; } -/* - * Debugging helper: via this flag we know that we are in - * 'early bootup code', and will warn about any invalid irqs-on event: - */ -static int early_boot_irqs_enabled; - -void early_boot_irqs_off(void) -{ - early_boot_irqs_enabled = 0; -} - -void early_boot_irqs_on(void) -{ - early_boot_irqs_enabled = 1; -} - /* * Hardirqs will be enabled: */ @@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; - if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) return; if (unlikely(curr->hardirqs_enabled)) { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b88..92b6e1e12d9 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) * Stubs: */ -void early_boot_irqs_off(void) -{ -} - -void early_boot_irqs_on(void) -{ -} - void trace_softirqs_on(unsigned long ip) { } -- cgit v1.2.3 From e4a9ea5ee7c8812a7bf0c3fb725ceeaa3d4c2fcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 09:15:30 -0500 Subject: tracing: Replace trace_event struct array with pointer array Currently the trace_event structures are placed in the _ftrace_events section, and at link time, the linker makes one large array of all the trace_event structures. On boot up, this array is read (much like the initcall sections) and the events are processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the _ftrace_event section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The _ftrace_event section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Cc: Mathieu Desnoyers Acked-by: David S. Miller Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 7 +++---- include/linux/module.h | 2 +- include/linux/syscalls.h | 10 ++++++---- include/trace/ftrace.h | 24 +++++++++++++----------- kernel/trace/trace_events.c | 12 ++++++------ kernel/trace/trace_export.c | 6 +++--- 6 files changed, 32 insertions(+), 29 deletions(-) (limited to 'kernel/trace') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6ebb81030d2..f53708be95e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -124,7 +124,8 @@ #endif #ifdef CONFIG_EVENT_TRACING -#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ +#define FTRACE_EVENTS() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; #else @@ -179,9 +180,6 @@ TRACE_PRINTKS() \ \ STRUCT_ALIGN(); \ - FTRACE_EVENTS() \ - \ - STRUCT_ALIGN(); \ TRACE_SYSCALLS() /* @@ -482,6 +480,7 @@ KERNEL_CTORS() \ *(.init.rodata) \ MCOUNT_REC() \ + FTRACE_EVENTS() \ DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \ diff --git a/include/linux/module.h b/include/linux/module.h index e7c6385c668..7695a303bb5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,7 +389,7 @@ struct module unsigned int num_trace_bprintk_fmt; #endif #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *trace_events; + struct ftrace_event_call **trace_events; unsigned int num_trace_events; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18cd0684fc4..45508fec366 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,28 +128,30 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_enter_##sname = &event_enter_##sname; \ __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_exit_##sname = &event_exit_##sname; \ __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e16610c208c..3e68366d485 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -446,14 +446,16 @@ static inline notrace int ftrace_get_offsets_##call( \ * .reg = ftrace_event_reg, * }; * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { + * static struct ftrace_event_call event_ = { * .name = "", * .class = event_class_