From 1a960b402a51d80abf54e3f8e4972374ffe5f22d Mon Sep 17 00:00:00 2001 From: Jason Yeh Date: Wed, 23 Jul 2008 23:05:53 +0200 Subject: Oprofile Multiplexing Patch This patch introduces multiplexing support for the Oprofile kernel module. It basically adds a new function pointer in oprofile_operator allowing each architecture to supply its callback to switch between different sets of event when the timer expires. Userspace tools can modify the time slice through /dev/oprofile/time_slice. It also modifies the number of counters exposed to the userspace through /dev/oprofile. For example, the number of counters for AMD CPUs are changed to 32 and multiplexed in the sets of 4. Signed-off-by: Jason Yeh Signed-off-by: Robert Richter Cc: oprofile-list Signed-off-by: Ingo Molnar --- arch/x86/oprofile/op_model_ppro.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/oprofile/op_model_ppro.c') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57..e5811aa480e 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -183,6 +183,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_hardware_counters = NUM_COUNTERS, + .num_hardware_controls = NUM_CONTROLS, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, -- cgit v1.2.3 From 4c168eaf7ea39f25a45a3d8c7eebc3fedb633a1d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 24 Sep 2008 11:08:52 +0200 Subject: Revert "Oprofile Multiplexing Patch" Reverting commit 1a960b402a51d80abf54e3f8e4972374ffe5f22d for the main branch. Multiplexing will be tracked on a separate feature branch. Conflicts: arch/x86/oprofile/nmi_int.c --- arch/x86/oprofile/nmi_int.c | 100 +++----------------------------------- arch/x86/oprofile/op_counter.h | 3 +- arch/x86/oprofile/op_model_amd.c | 76 ++++++++++++----------------- arch/x86/oprofile/op_model_p4.c | 4 -- arch/x86/oprofile/op_model_ppro.c | 2 - arch/x86/oprofile/op_x86_model.h | 3 -- drivers/oprofile/oprof.c | 58 ++-------------------- drivers/oprofile/oprof.h | 4 +- drivers/oprofile/oprofile_files.c | 39 +-------------- include/linux/oprofile.h | 3 -- 10 files changed, 45 insertions(+), 247 deletions(-) (limited to 'arch/x86/oprofile/op_model_ppro.c') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 4108d02c529..287513a0981 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -23,18 +23,12 @@ #include "op_counter.h" #include "op_x86_model.h" -DEFINE_PER_CPU(int, switch_index); - static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); static int nmi_start(void); static void nmi_stop(void); -static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs); -static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs); -static void nmi_cpu_stop(void *dummy); -static void nmi_cpu_start(void *dummy); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; @@ -87,47 +81,6 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ -static void nmi_cpu_switch(void *dummy) -{ - int cpu = smp_processor_id(); - int si = per_cpu(switch_index, cpu); - struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - - nmi_cpu_stop(NULL); - nmi_cpu_save_mpx_registers(msrs); - - /* move to next set */ - si += model->num_hardware_counters; - if ((si > model->num_counters) || (counter_config[si].count == 0)) - per_cpu(switch_index, smp_processor_id()) = 0; - else - per_cpu(switch_index, smp_processor_id()) = si; - - nmi_cpu_restore_mpx_registers(msrs); - model->setup_ctrs(msrs); - nmi_cpu_start(NULL); -} - -/* - * Quick check to see if multiplexing is necessary. - * The check should be sufficient since counters are used - * in ordre. - */ -static int nmi_multiplex_on(void) -{ - return counter_config[model->num_hardware_counters].count ? 0 : -EINVAL; -} - -static int nmi_switch_event(void) -{ - if (nmi_multiplex_on() < 0) - return -EINVAL; - - on_each_cpu(nmi_cpu_switch, NULL, 1); - - return 0; -} - static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -191,10 +144,11 @@ static void free_msrs(void) static int allocate_msrs(void) { - int i, success = 1; + int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; + int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, GFP_KERNEL); @@ -202,8 +156,8 @@ static int allocate_msrs(void) success = 0; break; } - per_cpu(cpu_msrs, i).controls = - kmalloc(controls_size, GFP_KERNEL); + per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, + GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) { success = 0; break; @@ -247,8 +201,7 @@ static int nmi_setup(void) return err; } - /* - * We need to serialize save and setup for HT because the subset + /* We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ @@ -264,6 +217,7 @@ static int nmi_setup(void) per_cpu(cpu_msrs, 0).controls, sizeof(struct op_msr) * model->num_controls); } + } on_each_cpu(nmi_save_registers, NULL, 1); on_each_cpu(nmi_cpu_setup, NULL, 1); @@ -271,41 +225,7 @@ static int nmi_setup(void) return 0; } -static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) -{ - unsigned int si = __get_cpu_var(switch_index); - unsigned int const nr_ctrs = model->num_hardware_counters; - struct op_msr *counters = &msrs->counters[si]; - unsigned int i; - - for (i = 0; i < nr_ctrs; ++i) { - int offset = i + si; - if (counters[offset].addr) { - rdmsr(counters[offset].addr, - counters[offset].multiplex.low, - counters[offset].multiplex.high); - } - } -} - -static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) -{ - unsigned int si = __get_cpu_var(switch_index); - unsigned int const nr_ctrs = model->num_hardware_counters; - struct op_msr *counters = &msrs->counters[si]; - unsigned int i; - - for (i = 0; i < nr_ctrs; ++i) { - int offset = i + si; - if (counters[offset].addr) { - wrmsr(counters[offset].addr, - counters[offset].multiplex.low, - counters[offset].multiplex.high); - } - } -} - -static void nmi_cpu_restore_registers(struct op_msrs *msrs) +static void nmi_restore_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; @@ -345,8 +265,7 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); - nmi_cpu_restore_registers(msrs); - __get_cpu_var(switch_index) = 0; + nmi_restore_registers(msrs); } static void nmi_shutdown(void) @@ -409,7 +328,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); - counter_config[i].save_count_low = 0; } return 0; @@ -551,14 +469,12 @@ int __init op_nmi_init(struct oprofile_operations *ops) } /* default values, can be overwritten by model */ - __raw_get_cpu_var(switch_index) = 0; ops->create_files = nmi_create_files; ops->setup = nmi_setup; ops->shutdown = nmi_shutdown; ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; - ops->switch_events = nmi_switch_event; if (model->init) ret = model->init(ops); diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 786d6e01cf7..2880b15c467 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,14 +10,13 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 32 +#define OP_MAX_COUNTER 8 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; - unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index bbf2b68bcc5..d9faf607b3a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -24,10 +23,8 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 32 -#define NUM_HARDWARE_COUNTERS 4 -#define NUM_CONTROLS 32 -#define NUM_HARDWARE_CONTROLS 4 +#define NUM_COUNTERS 4 +#define NUM_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -51,7 +48,6 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; -DECLARE_PER_CPU(int, switch_index); #ifdef CONFIG_OPROFILE_IBS @@ -134,17 +130,15 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) int i; for (i = 0; i < NUM_COUNTERS; i++) { - int hw_counter = i % NUM_HARDWARE_COUNTERS; - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - int hw_control = i % NUM_HARDWARE_CONTROLS; - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; else msrs->controls[i].addr = 0; } @@ -156,16 +150,8 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; - for (i = 0; i < NUM_HARDWARE_CONTROLS; ++i) { - int offset = i + __get_cpu_var(switch_index); - if (counter_config[offset].enabled) - reset_value[offset] = counter_config[offset].count; - else - reset_value[offset] = 0; - } - /* clear all counters */ - for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { + for (i = 0 ; i < NUM_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -175,31 +161,34 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { - int offset = i + __get_cpu_var(switch_index); - if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs, i))) { - CTR_WRITE(counter_config[offset].count, msrs, i); + for (i = 0; i < NUM_COUNTERS; ++i) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[offset].user); - CTRL_SET_KERN(low, counter_config[offset].kernel); - CTRL_SET_UM(low, counter_config[offset].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[offset].event); - CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[i].event); + CTRL_SET_EVENT_HIGH(high, counter_config[i].event); CTRL_SET_HOST_ONLY(high, 0); CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; } } } @@ -287,14 +276,13 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, unsigned int low, high; int i; - for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { - int offset = i + __get_cpu_var(switch_index); - if (!reset_value[offset]) + for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, offset); - CTR_WRITE(reset_value[offset], msrs, i); + oprofile_add_sample(regs, i); + CTR_WRITE(reset_value[i], msrs, i); } } @@ -310,10 +298,8 @@ static void op_amd_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - - for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { - int offset = i + __get_cpu_var(switch_index); - if (reset_value[offset]) { + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); @@ -343,8 +329,8 @@ static void op_amd_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { - if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); @@ -370,11 +356,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_CONTROLS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -548,8 +534,6 @@ struct op_x86_model_spec const op_amd_spec = { .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, - .num_hardware_counters = NUM_HARDWARE_COUNTERS, - .num_hardware_controls = NUM_HARDWARE_CONTROLS, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index cacba61ffba..43ac5af338d 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -700,8 +700,6 @@ static void p4_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, - .num_hardware_counters = NUM_COUNTERS_HT2, - .num_hardware_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -714,8 +712,6 @@ struct op_x86_model_spec const op_p4_ht2_spec = { struct op_x86_model_spec const op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, - .num_hardware_counters = NUM_COUNTERS_NON_HT, - .num_hardware_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index e5811aa480e..eff431f6c57 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -183,8 +183,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, - .num_hardware_counters = NUM_COUNTERS, - .num_hardware_controls = NUM_CONTROLS, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index e07ba107637..05a0261ba0c 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -19,7 +19,6 @@ struct op_saved_msr { struct op_msr { unsigned long addr; struct op_saved_msr saved; - struct op_saved_msr multiplex; }; struct op_msrs { @@ -35,8 +34,6 @@ struct pt_regs; struct op_x86_model_spec { int (*init)(struct oprofile_operations *ops); void (*exit)(void); - unsigned int const num_hardware_counters; - unsigned int const num_hardware_controls; unsigned int const num_counters; unsigned int const num_controls; void (*fill_in_addresses)(struct op_msrs * const msrs); diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index b2fa5df64a6..2c645170f06 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include #include "oprof.h" @@ -21,18 +19,13 @@ #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprofile_stats.h" - -static unsigned long is_setup; -static void switch_worker(struct work_struct *work); -static DECLARE_DELAYED_WORK(switch_work, switch_worker); -static DEFINE_MUTEX(start_mutex); struct oprofile_operations oprofile_ops; -unsigned long timeout_jiffies; unsigned long oprofile_started; unsigned long backtrace_depth; -/* Multiplexing defaults at 1 msec*/ +static unsigned long is_setup; +static DEFINE_MUTEX(start_mutex); /* timer 0 - use performance monitoring hardware if available @@ -94,16 +87,6 @@ out: return err; } -static void start_switch_worker(void) -{ - schedule_delayed_work(&switch_work, timeout_jiffies); -} - -static void switch_worker(struct work_struct *work) -{ - if (!oprofile_ops.switch_events()) - start_switch_worker(); -} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -111,6 +94,7 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); + if (!is_setup) goto out; @@ -124,9 +108,6 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; - if (oprofile_ops.switch_events) - start_switch_worker(); - oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -142,7 +123,6 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; - cancel_delayed_work_sync(&switch_work); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -175,32 +155,6 @@ post_sync: mutex_unlock(&start_mutex); } -/* User inputs in ms, converts to jiffies */ -int oprofile_set_timeout(unsigned long val_msec) -{ - int err = 0; - - mutex_lock(&start_mutex); - - if (oprofile_started) { - err = -EBUSY; - goto out; - } - - if (!oprofile_ops.switch_events) { - err = -EINVAL; - goto out; - } - - timeout_jiffies = msecs_to_jiffies(val_msec); - if (timeout_jiffies == MAX_JIFFY_OFFSET) - timeout_jiffies = msecs_to_jiffies(1); - -out: - mutex_unlock(&start_mutex); - return err; - -} int oprofile_set_backtrace(unsigned long val) { @@ -225,16 +179,10 @@ out: return err; } -static void __init oprofile_switch_timer_init(void) -{ - timeout_jiffies = msecs_to_jiffies(1); -} - static int __init oprofile_init(void) { int err; - oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index c4406a7366b..18323650806 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,8 +27,7 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; -extern unsigned long timeout_jiffies; - + struct super_block; struct dentry; @@ -36,6 +35,5 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); -int oprofile_set_timeout(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index cc4f5a1f8ef..ef953ba5ab6 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -9,7 +9,6 @@ #include #include -#include #include "event_buffer.h" #include "oprofile_stats.h" @@ -19,40 +18,6 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ -static ssize_t timeout_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), - buf, count, offset); -} - - -static ssize_t timeout_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) - return retval; - - retval = oprofile_set_timeout(val); - - if (retval) - return retval; - return count; -} - -static const struct file_operations timeout_fops = { - .read = timeout_read, - .write = timeout_write, -}; - - static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -120,10 +85,11 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -163,7 +129,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); - oprofilefs_create_file(sb, root, "timeout_ms", &timeout_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 687f2f4c36a..bcb8f725427 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -67,9 +67,6 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); - - /* Multiplex between different events. Optional. */ - int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; -- cgit v1.2.3 From b99170288421c79f0c2efa8b33e26e65f4bb7fb8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 18 Aug 2008 14:50:31 +0200 Subject: oprofile: Implement Intel architectural perfmon support Newer Intel CPUs (Core1+) have support for architectural events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. The advantage of this is that it can be done without knowing about the specific CPU, because the CPU describes by itself what performance events are supported. This is only a fallback because only a limited set of 6 events are supported. This allows to do profiling on Nehalem and on Atom systems (later not tested) This patch implements support for that in oprofile's Intel Family 6 profiling module. It also has the advantage of supporting an arbitary number of events now as reported by the CPU. Also allow arbitary counter widths >32bit while we're at it. Requires a patched oprofile userland to support the new architecture. v2: update for latest oprofile tree remove force_arch_perfmon Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 23 ++++++--- arch/x86/oprofile/op_model_ppro.c | 104 ++++++++++++++++++++++++++++++-------- arch/x86/oprofile/op_x86_model.h | 3 ++ 3 files changed, 102 insertions(+), 28 deletions(-) (limited to 'arch/x86/oprofile/op_model_ppro.c') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 1059f3fe6b1..12d6f85084f 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -429,6 +429,16 @@ static int __init ppro_init(char **cpu_type) return 1; } +static int __init arch_perfmon_init(char **cpu_type) +{ + if (!cpu_has_arch_perfmon) + return 0; + *cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; + arch_perfmon_setup_counters(); + return 1; +} + /* in order to get sysfs right */ static int using_nmi; @@ -436,7 +446,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; __u8 family = boot_cpu_data.x86; - char *cpu_type; + char *cpu_type = NULL; int ret = 0; if (!cpu_has_apic) @@ -474,19 +484,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) switch (family) { /* Pentium IV */ case 0xf: - if (!p4_init(&cpu_type)) - return -ENODEV; + p4_init(&cpu_type); break; /* A P6-class processor */ case 6: - if (!ppro_init(&cpu_type)) - return -ENODEV; + ppro_init(&cpu_type); break; default: - return -ENODEV; + break; } + + if (!cpu_type && !arch_perfmon_init(&cpu_type)) + return -ENODEV; break; default: diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57..12e207a67f1 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -1,32 +1,34 @@ /* * @file op_model_ppro.h - * pentium pro / P6 model-specific MSR operations + * Family 6 perfmon and architectural perfmon MSR operations * * @remark Copyright 2002 OProfile authors + * @remark Copyright 2008 Intel Corporation * @remark Read the file COPYING * * @author John Levon * @author Philippe Elie * @author Graydon Hoare + * @author Andi Kleen */ #include +#include #include #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 2 -#define NUM_CONTROLS 2 +static int num_counters = 2; +static int counter_width = 32; #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_32BIT_WRITE(l, msrs, c) \ - do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) @@ -40,20 +42,20 @@ #define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT(val, e) (val |= e) -static unsigned long reset_value[NUM_COUNTERS]; +static u64 *reset_value; static void ppro_fill_in_addresses(struct op_msrs * const msrs) { int i; - for (i = 0; i < NUM_COUNTERS; i++) { + for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; else msrs->counters[i].addr = 0; } - for (i = 0; i < NUM_CONTROLS; i++) { + for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; else @@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) { + reset_value = kmalloc(sizeof(unsigned) * num_counters, + GFP_ATOMIC); + if (!reset_value) + return; + } + + if (cpu_has_arch_perfmon) { + union cpuid10_eax eax; + eax.full = cpuid_eax(0xa); + if (counter_width < eax.split.bit_width) + counter_width = eax.split.bit_width; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < num_counters; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; - CTR_32BIT_WRITE(1, msrs, i); + wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; - CTR_32BIT_WRITE(counter_config[i].count, msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); @@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { + for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); - CTR_32BIT_WRITE(reset_value[i], msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); } } @@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); @@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); @@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < num_counters ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < num_counters ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } + if (reset_value) { + kfree(reset_value); + reset_value = NULL; + } } struct op_x86_model_spec const op_ppro_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, + .num_counters = 2, + .num_controls = 2, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, + .shutdown = &ppro_shutdown +}; + +/* + * Architectural performance monitoring. + * + * Newer Intel CPUs (Core1+) have support for architectural + * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. + * The advantage of this is that it can be done without knowing about + * the specific CPU. + */ + +void arch_perfmon_setup_counters(void) +{ + union cpuid10_eax eax; + + eax.full = cpuid_eax(0xa); + + /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ + if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && + current_cpu_data.x86_model == 15) { + eax.split.version_id = 2; + eax.split.num_counters = 2; + eax.split.bit_width = 40; + } + + num_counters = eax.split.num_counters; + + op_arch_perfmon_spec.num_counters = num_counters; + op_arch_perfmon_spec.num_controls = num_counters; +} + +struct op_x86_model_spec op_arch_perfmon_spec = { + /* num_counters/num_controls filled in at runtime */ .fill_in_addresses = &ppro_fill_in_addresses, + /* user space does the cpuid check for available events */ .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, .start = &ppro_start, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 3d3b85d3c25..0b601893a4d 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -49,5 +49,8 @@ extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; +extern struct op_x86_model_spec op_arch_perfmon_spec; + +extern void arch_perfmon_setup_counters(void); #endif /* OP_X86_MODEL_H */ -- cgit v1.2.3 From 59512900baab03c5629f2ff5efad1d5d4e682ece Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 29 Sep 2008 22:23:33 +0200 Subject: oprofile: discover counters for op ppro too Discover number of counters for all family 6 models even when not in arch perfmon mode. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 8 +++++--- arch/x86/oprofile/op_x86_model.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/oprofile/op_model_ppro.c') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 12e207a67f1..f5a226823e9 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -200,9 +200,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs) } -struct op_x86_model_spec const op_ppro_spec = { - .num_counters = 2, - .num_controls = 2, +struct op_x86_model_spec op_ppro_spec = { + .num_counters = 2, /* can be overriden */ + .num_controls = 2, /* dito */ .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, @@ -238,6 +238,8 @@ void arch_perfmon_setup_counters(void) op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; + op_ppro_spec.num_counters = num_counters; + op_ppro_spec.num_controls = num_counters; } struct op_x86_model_spec op_arch_perfmon_spec = { diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 0b601893a4d..596de7a5559 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -45,7 +45,7 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; -extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; -- cgit v1.2.3 From c92960fccb9f32a1d6110f6dcfe483ed96c62beb Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 5 Sep 2008 17:12:36 +0200 Subject: oprofile: whitespace fixes Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 20 ++++++++++---------- arch/x86/oprofile/op_model_p4.c | 32 ++++++++++++++++---------------- arch/x86/oprofile/op_model_ppro.c | 16 ++++++++-------- drivers/oprofile/buffer_sync.c | 1 - drivers/oprofile/oprof.c | 24 ++++++++++++------------ 5 files changed, 46 insertions(+), 47 deletions(-) (limited to 'arch/x86/oprofile/op_model_ppro.c') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 23ce63f2762..b9a810b3326 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -530,14 +530,14 @@ static void op_amd_exit(void) #endif struct op_x86_model_spec const op_amd_spec = { - .init = op_amd_init, - .exit = op_amd_exit, - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &op_amd_fill_in_addresses, - .setup_ctrs = &op_amd_setup_ctrs, - .check_ctrs = &op_amd_check_ctrs, - .start = &op_amd_start, - .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown + .init = op_amd_init, + .exit = op_amd_exit, + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &op_amd_fill_in_addresses, + .setup_ctrs = &op_amd_setup_ctrs, + .check_ctrs = &op_amd_check_ctrs, + .start = &op_amd_start, + .stop = &op_amd_stop, + .shutdown = &op_amd_shutdown }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 43ac5af338d..4c4a51c90bc 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -698,24 +698,24 @@ static void p4_shutdown(struct op_msrs const * const msrs) #ifdef CONFIG_SMP struct op_x86_model_spec const op_p4_ht2_spec = { - .num_counters = NUM_COUNTERS_HT2, - .num_controls = NUM_CONTROLS_HT2, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown }; #endif struct op_x86_model_spec const op_p4_spec = { - .num_counters = NUM_COUNTERS_NON_HT, - .num_controls = NUM_CONTROLS_NON_HT, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown }; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57..c665bac4a14 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -181,12 +181,12 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &ppro_fill_in_addresses, - .setup_ctrs = &ppro_setup_ctrs, - .check_ctrs = &ppro_check_ctrs, - .start = &ppro_start, - .stop = &ppro_stop, - .shutdown = &ppro_shutdown + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, + .shutdown = &ppro_shutdown }; diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index ed982273fb8..564577307a5 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -41,7 +41,6 @@ static cpumask_t marked_cpus = CPU_MASK_NONE; static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); - /* Take ownership of the task struct and place it on the * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c645170f06..50062cea292 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -19,7 +19,7 @@ #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprofile_stats.h" - + struct oprofile_operations oprofile_ops; unsigned long oprofile_started; @@ -36,7 +36,7 @@ static int timer = 0; int oprofile_setup(void) { int err; - + mutex_lock(&start_mutex); if ((err = alloc_cpu_buffers())) @@ -44,10 +44,10 @@ int oprofile_setup(void) if ((err = alloc_event_buffer())) goto out1; - + if (oprofile_ops.setup && (err = oprofile_ops.setup())) goto out2; - + /* Note even though this starts part of the * profiling overhead, it's necessary to prevent * us missing task deaths and eventually oopsing @@ -74,7 +74,7 @@ post_sync: is_setup = 1; mutex_unlock(&start_mutex); return 0; - + out3: if (oprofile_ops.shutdown) oprofile_ops.shutdown(); @@ -92,17 +92,17 @@ out: int oprofile_start(void) { int err = -EINVAL; - + mutex_lock(&start_mutex); if (!is_setup) goto out; - err = 0; - + err = 0; + if (oprofile_started) goto out; - + oprofile_reset_stats(); if ((err = oprofile_ops.start())) @@ -114,7 +114,7 @@ out: return err; } - + /* echo 0>/dev/oprofile/enable */ void oprofile_stop(void) { @@ -204,13 +204,13 @@ static void __exit oprofile_exit(void) oprofile_arch_exit(); } - + module_init(oprofile_init); module_exit(oprofile_exit); module_param_named(timer, timer, int, 0644); MODULE_PARM_DESC(timer, "force use of timer interrupt"); - + MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Levon "); MODULE_DESCRIPTION("OProfile system profiler"); -- cgit v1.2.3