summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-03-23 18:22:08 +0100
committerIngo Molnar <mingo@elte.hu>2009-04-06 09:30:26 +0200
commit37d81828385f8ff823caaaf1a83e72d065b6cfa1 (patch)
tree972900a193a6a5ab1bdc14adcd7ab72bf0a51c13 /kernel
parent96f6d4444302bb2ea2cf409529eef816462f6ce0 (diff)
perf_counter: add an mmap method to allow userspace to read hardware counters
Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_counter.c76
1 files changed, 76 insertions, 0 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ce34bff07bd..d9cfd902140 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file)
mutex_unlock(&counter->mutex);
mutex_unlock(&ctx->mutex);
+ free_page(counter->user_page);
free_counter(counter);
put_context(ctx);
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return err;
}
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+ struct perf_counter_mmap_page *userpg;
+
+ if (!counter->user_page)
+ return;
+ userpg = (struct perf_counter_mmap_page *) counter->user_page;
+
+ ++userpg->lock;
+ smp_wmb();
+ userpg->index = counter->hw.idx;
+ userpg->offset = atomic64_read(&counter->count);
+ if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+ userpg->offset -= atomic64_read(&counter->hw.prev_count);
+ smp_wmb();
+ ++userpg->lock;
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct perf_counter *counter = vma->vm_file->private_data;
+
+ if (!counter->user_page)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = virt_to_page(counter->user_page);
+ get_page(vmf->page);
+ return 0;
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+ .fault = perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct perf_counter *counter = file->private_data;
+ unsigned long userpg;
+
+ if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ /*
+ * For now, restrict to the case of a hardware counter
+ * on the current task.
+ */
+ if (is_software_counter(counter) || counter->task != current)
+ return -EINVAL;
+
+ userpg = counter->user_page;
+ if (!userpg) {
+ userpg = get_zeroed_page(GFP_KERNEL);
+ mutex_lock(&counter->mutex);
+ if (counter->user_page) {
+ free_page(userpg);
+ userpg = counter->user_page;
+ } else {
+ counter->user_page = userpg;
+ }
+ mutex_unlock(&counter->mutex);
+ if (!userpg)
+ return -ENOMEM;
+ }
+
+ perf_counter_update_userpage(counter);
+
+ vma->vm_flags &= ~VM_MAYWRITE;
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &perf_mmap_vmops;
+ return 0;
+}
+
static const struct file_operations perf_fops = {
.release = perf_release,
.read = perf_read,
.poll = perf_poll,
.unlocked_ioctl = perf_ioctl,
.compat_ioctl = perf_ioctl,
+ .mmap = perf_mmap,
};
/*