diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/backtracetest.c | 48 | ||||
-rw-r--r-- | kernel/irq/manage.c | 3 | ||||
-rw-r--r-- | kernel/irq/proc.c | 21 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 5 | ||||
-rw-r--r-- | kernel/kprobes.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 8 | ||||
-rw-r--r-- | kernel/panic.c | 29 | ||||
-rw-r--r-- | kernel/printk.c | 7 | ||||
-rw-r--r-- | kernel/ptrace.c | 165 | ||||
-rw-r--r-- | kernel/sched.c | 16 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/softirq.c | 11 | ||||
-rw-r--r-- | kernel/spinlock.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | kernel/test_kprobes.c | 216 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 13 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 31 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 7 | ||||
-rw-r--r-- | kernel/time/tick-internal.h | 2 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 76 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 28 | ||||
-rw-r--r-- | kernel/time/timer_stats.c | 2 | ||||
-rw-r--r-- | kernel/timer.c | 82 |
24 files changed, 670 insertions, 120 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 390d4214626..8885627ea02 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o +obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o @@ -43,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o +obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c new file mode 100644 index 00000000000..d1a7605c5b8 --- /dev/null +++ b/kernel/backtracetest.c @@ -0,0 +1,48 @@ +/* + * Simple stack backtrace regression test module + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> + +static struct timer_list backtrace_timer; + +static void backtrace_test_timer(unsigned long data) +{ + printk("Testing a backtrace from irq context.\n"); + printk("The following trace is a kernel self test and not a bug!\n"); + dump_stack(); +} +static int backtrace_regression_test(void) +{ + printk("====[ backtrace testing ]===========\n"); + printk("Testing a backtrace from process context.\n"); + printk("The following trace is a kernel self test and not a bug!\n"); + dump_stack(); + + init_timer(&backtrace_timer); + backtrace_timer.function = backtrace_test_timer; + mod_timer(&backtrace_timer, jiffies + 10); + + msleep(10); + printk("====[ end of backtrace testing ]====\n"); + return 0; +} + +static void exitf(void) +{ +} + +module_init(backtrace_regression_test); +module_exit(exitf); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1f314221d53..438a0146428 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id) return; } printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq); +#ifdef CONFIG_DEBUG_SHIRQ + dump_stack(); +#endif spin_unlock_irqrestore(&desc->lock, flags); return; } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 50b81b98046..c2f2ccb0549 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -75,6 +75,18 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, #endif +static int irq_spurious_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct irq_desc *d = &irq_desc[(long) data]; + return sprintf(page, "count %u\n" + "unhandled %u\n" + "last_unhandled %u ms\n", + d->irq_count, + d->irqs_unhandled, + jiffies_to_msecs(d->last_unhandled)); +} + #define MAX_NAMELEN 128 static int name_unique(unsigned int irq, struct irqaction *new_action) @@ -118,6 +130,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq) { char name [MAX_NAMELEN]; + struct proc_dir_entry *entry; if (!root_irq_dir || (irq_desc[irq].chip == &no_irq_chip) || @@ -132,8 +145,6 @@ void register_irq_proc(unsigned int irq) #ifdef CONFIG_SMP { - struct proc_dir_entry *entry; - /* create /proc/irq/<irq>/smp_affinity */ entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir); @@ -144,6 +155,12 @@ void register_irq_proc(unsigned int irq) } } #endif + + entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir); + if (entry) { + entry->data = (void *)(long)irq; + entry->read_proc = irq_spurious_read; + } } #undef MAX_NAMELEN diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 32b161972fa..a6b2bc831dd 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/interrupt.h> +#include <linux/moduleparam.h> static int irqfixup __read_mostly; @@ -225,6 +226,8 @@ int noirqdebug_setup(char *str) } __setup("noirqdebug", noirqdebug_setup); +module_param(noirqdebug, bool, 0644); +MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { @@ -236,6 +239,8 @@ static int __init irqfixup_setup(char *str) } __setup("irqfixup", irqfixup_setup); +module_param(irqfixup, int, 0644); +MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode"); static int __init irqpoll_setup(char *str) { diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e3a5d817ac9..d0493eafea3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -824,6 +824,8 @@ static int __init init_kprobes(void) if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + if (!err) + init_test_probes(); return err; } diff --git a/kernel/module.c b/kernel/module.c index f6a4e721fd4..bd60278ee70 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -430,6 +430,14 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } +static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +{ + int cpu; + + for_each_possible_cpu(cpu) + memcpy(pcpudest + per_cpu_offset(cpu), from, size); +} + static int percpu_modinit(void) { pcpu_num_used = 2; diff --git a/kernel/panic.c b/kernel/panic.c index da4d6bac270..d9e90cfe329 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -20,6 +20,7 @@ #include <linux/kexec.h> #include <linux/debug_locks.h> #include <linux/random.h> +#include <linux/kallsyms.h> int panic_on_oops; int tainted; @@ -280,6 +281,13 @@ static int init_oops_id(void) } late_initcall(init_oops_id); +static void print_oops_end_marker(void) +{ + init_oops_id(); + printk(KERN_WARNING "---[ end trace %016llx ]---\n", + (unsigned long long)oops_id); +} + /* * Called when the architecture exits its oops handler, after printing * everything. @@ -287,11 +295,26 @@ late_initcall(init_oops_id); void oops_exit(void) { do_oops_enter_exit(); - init_oops_id(); - printk(KERN_WARNING "---[ end trace %016llx ]---\n", - (unsigned long long)oops_id); + print_oops_end_marker(); } +#ifdef WANT_WARN_ON_SLOWPATH +void warn_on_slowpath(const char *file, int line) +{ + char function[KSYM_SYMBOL_LEN]; + unsigned long caller = (unsigned long) __builtin_return_address(0); + sprint_symbol(function, caller); + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, + line, function); + print_modules(); + dump_stack(); + print_oops_end_marker(); +} +EXPORT_SYMBOL(warn_on_slowpath); +#endif + #ifdef CONFIG_CC_STACKPROTECTOR /* * Called when gcc's -fstack-protector feature is used, and diff --git a/kernel/printk.c b/kernel/printk.c index 3b7c968d0ef..58bbec68411 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -36,6 +36,13 @@ #include <asm/uaccess.h> +/* + * Architectures can override it: + */ +void __attribute__((weak)) early_printk(const char *fmt, ...) +{ +} + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) /* printk's without a loglevel use this.. */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index c719bb9d79a..e6e9b8be4b0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -366,12 +366,73 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data) return error; } + +#ifdef PTRACE_SINGLESTEP +#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) +#else +#define is_singlestep(request) 0 +#endif + +#ifdef PTRACE_SINGLEBLOCK +#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) +#else +#define is_singleblock(request) 0 +#endif + +#ifdef PTRACE_SYSEMU +#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP) +#else +#define is_sysemu_singlestep(request) 0 +#endif + +static int ptrace_resume(struct task_struct *child, long request, long data) +{ + if (!valid_signal(data)) + return -EIO; + + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + +#ifdef TIF_SYSCALL_EMU + if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP) + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); +#endif + + if (is_singleblock(request)) { + if (unlikely(!arch_has_block_step())) + return -EIO; + user_enable_block_step(child); + } else if (is_singlestep(request) || is_sysemu_singlestep(request)) { + if (unlikely(!arch_has_single_step())) + return -EIO; + user_enable_single_step(child); + } + else + user_disable_single_step(child); + + child->exit_code = data; + wake_up_process(child); + + return 0; +} + int ptrace_request(struct task_struct *child, long request, long addr, long data) { int ret = -EIO; switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + return generic_ptrace_peekdata(child, addr, data); + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + return generic_ptrace_pokedata(child, addr, data); + #ifdef PTRACE_OLDSETOPTIONS case PTRACE_OLDSETOPTIONS: #endif @@ -390,6 +451,26 @@ int ptrace_request(struct task_struct *child, long request, case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; + +#ifdef PTRACE_SINGLESTEP + case PTRACE_SINGLESTEP: +#endif +#ifdef PTRACE_SINGLEBLOCK + case PTRACE_SINGLEBLOCK: +#endif +#ifdef PTRACE_SYSEMU + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: +#endif + case PTRACE_SYSCALL: + case PTRACE_CONT: + return ptrace_resume(child, request, data); + + case PTRACE_KILL: + if (child->exit_state) /* already dead */ + return 0; + return ptrace_resume(child, request, SIGKILL); + default: break; } @@ -526,3 +607,87 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); return (copied == sizeof(data)) ? 0 : -EIO; } + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +int compat_ptrace_request(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data) +{ + compat_ulong_t __user *datap = compat_ptr(data); + compat_ulong_t word; + int ret; + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = access_process_vm(child, addr, &word, sizeof(word), 0); + if (ret != sizeof(word)) + ret = -EIO; + else + ret = put_user(word, datap); + break; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = access_process_vm(child, addr, &data, sizeof(data), 1); + ret = (ret != sizeof(data) ? -EIO : 0); + break; + + case PTRACE_GETEVENTMSG: + ret = put_user((compat_ulong_t) child->ptrace_message, datap); + break; + + default: + ret = ptrace_request(child, request, addr, data); + } + + return ret; +} + +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data) +{ + struct task_struct *child; + long ret; + + /* + * This lock_kernel fixes a subtle race with suid exec + */ + lock_kernel(); + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); + goto out; + } + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + /* + * Some architectures need to do book-keeping after + * a ptrace attach. + */ + if (!ret) + arch_ptrace_attach(child); + goto out_put_task_struct; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (!ret) + ret = compat_arch_ptrace(child, request, addr, data); + + out_put_task_struct: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ + +#endif /* CONFIG_COMPAT */ diff --git a/kernel/sched.c b/kernel/sched.c index 524285e46fa..ba4c88088f6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched); */ int cond_resched_lock(spinlock_t *lock) { + int resched = need_resched() && system_state == SYSTEM_RUNNING; int ret = 0; - if (need_lockbreak(lock)) { + if (spin_needbreak(lock) || resched) { spin_unlock(lock); - cpu_relax(); - ret = 1; - spin_lock(lock); - } - if (need_resched() && system_state == SYSTEM_RUNNING) { - spin_release(&lock->dep_map, 1, _THIS_IP_); - _raw_spin_unlock(lock); - preempt_enable_no_resched(); - __cond_resched(); + if (resched && need_resched()) + __cond_resched(); + else + cpu_relax(); ret = 1; spin_lock(lock); } diff --git a/kernel/signal.c b/kernel/signal.c index afa4f781f92..bf49ce6f016 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt_regs *regs, int signr) current->comm, task_pid_nr(current), signr); #if defined(__i386__) && !defined(__arch_um__) - printk("code at %08lx: ", regs->eip); + printk("code at %08lx: ", regs->ip); { int i; for (i = 0; i < 16; i++) { unsigned char insn; - __get_user(insn, (unsigned char *)(regs->eip + i)); + __get_user(insn, (unsigned char *)(regs->ip + i)); printk("%02x ", insn); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index bd89bc4eb0b..d7837d45419 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -3,7 +3,9 @@ * * Copyright (C) 1992 Linus Torvalds * - * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * Distribute under GPLv2. + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) */ #include <linux/module.h> @@ -278,9 +280,14 @@ asmlinkage void do_softirq(void) */ void irq_enter(void) { +#ifdef CONFIG_NO_HZ + int cpu = smp_processor_id(); + if (idle_cpu(cpu) && !in_interrupt()) + tick_nohz_stop_idle(cpu); +#endif __irq_enter(); #ifdef CONFIG_NO_HZ - if (idle_cpu(smp_processor_id())) + if (idle_cpu(cpu)) tick_nohz_update_jiffies(); #endif } diff --git a/kernel/spinlock.c b/kernel/spinlock.c index cd72424c266..ae28c824512 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock); * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ - defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) void __lockfunc _read_lock(rwlock_t *lock) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4bc8e48434a..357b68ba23e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -53,6 +53,7 @@ #ifdef CONFIG_X86 #include <asm/nmi.h> #include <asm/stacktrace.h> +#include <asm/io.h> #endif static int deprecated_sysctl_warning(struct __sysctl_args *args); @@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c new file mode 100644 index 00000000000..88cdb109e13 --- /dev/null +++ b/kernel/test_kprobes.c @@ -0,0 +1,216 @@ +/* + * test_kprobes.c - simple sanity test for *probes + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/random.h> + +#define div_factor 3 + +static u32 rand1, preh_val, posth_val, jph_val; +static int errors, handler_errors, num_tests; + +static noinline u32 kprobe_target(u32 value) +{ + /* + * gcc ignores noinline on some architectures unless we stuff + * sufficient lard into the function. The get_kprobe() here is + * just for that. + * + * NOTE: We aren't concerned about the correctness of get_kprobe() + * here; hence, this call is neither under !preempt nor with the + * kprobe_mutex held. This is fine(tm) + */ + if (get_kprobe((void *)0xdeadbeef)) + printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n"); + + return (value / div_factor); +} + +static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + preh_val = (rand1 / div_factor); + return 0; +} + +static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + if (preh_val != (rand1 / div_factor)) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in post_handler\n"); + } + posth_val = preh_val + div_factor; +} + +static struct kprobe kp = { + .symbol_name = "kprobe_target", + .pre_handler = kp_pre_handler, + .post_handler = kp_post_handler +}; + +static int test_kprobe(void) +{ + int ret; + + ret = register_kprobe(&kp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_kprobe(&kp); + + if (preh_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe pre_handler not called\n"); + handler_errors++; + } + + if (posth_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe post_handler not called\n"); + handler_errors++; + } + + return 0; +} + +static u32 j_kprobe_target(u32 value) +{ + if (value != rand1) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in jprobe handler\n"); + } + + jph_val = rand1; + jprobe_return(); + return 0; +} + +static struct jprobe jp = { + .entry = j_kprobe_target, + .kp.symbol_name = "kprobe_target" +}; + +static int test_jprobe(void) +{ + int ret; + + ret = register_jprobe(&jp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_jprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_jprobe(&jp); + if (jph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "jprobe handler not called\n"); + handler_errors++; + } + + return 0; +} + +#ifdef CONFIG_KRETPROBES +static u32 krph_val; + +static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long ret = regs_return_value(regs); + + if (ret != (rand1 / div_factor)) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in kretprobe handler\n"); + } + + krph_val = (rand1 / div_factor); + return 0; +} + +static struct kretprobe rp = { + .handler = return_handler, + .kp.symbol_name = "kprobe_target" +}; + +static int test_kretprobe(void) +{ + int ret; + + ret = register_kretprobe(&rp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kretprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_kretprobe(&rp); + if (krph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kretprobe handler not called\n"); + handler_errors++; + } + + return 0; +} +#endif /* CONFIG_KRETPROBES */ + +int init_test_probes(void) +{ + int ret; + + do { + rand1 = random32(); + } while (rand1 <= div_factor); + + printk(KERN_INFO "Kprobe smoke test started\n"); + num_tests++; + ret = test_kprobe(); + if (ret < 0) + errors++; + + num_tests++; + ret = test_jprobe(); + if (ret < 0) + errors++; + +#ifdef CONFIG_KRETPROBES + num_tests++; + ret = test_kretprobe(); + if (ret < 0) + errors++; +#endif /* CONFIG_KRETPROBES */ + + if (errors) + printk(KERN_ERR "BUG: Kprobe smoke test: %d out of " + "%d tests failed\n", errors, num_tests); + else if (handler_errors) + printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) " + "running handlers\n", handler_errors); + else + printk(KERN_INFO "Kprobe smoke test passed successfully\n"); + + return 0; +} diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 5fb139fef9f..3e59fce6dd4 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch, { u64 clc = ((u64) latch << evt->shift); + if (unlikely(!evt->mult)) { + evt->mult = 1; + WARN_ON(1); + } + do_div(clc, evt->mult); if (clc < 1000) clc = 1000; @@ -151,6 +156,14 @@ static void clockevents_notify_released(void) void clockevents_register_device(struct clock_event_device *dev) { BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + /* + * A nsec2cyc multiplicator of 0 is invalid and we'd crash + * on it, so fix it up and emit a warning: + */ + if (unlikely(!dev->mult)) { + dev->mult = 1; + WARN_ON(1); + } spin_lock(&clockevents_lock); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 8d6125ad2cf..6e9259a5d50 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data) } if (!list_empty(&watchdog_list)) { - __mod_timer(&watchdog_timer, - watchdog_timer.expires + WATCHDOG_INTERVAL); + /* Cycle through CPUs to check if the CPUs stay synchronized to + * each other. */ + int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); + if (next_cpu >= NR_CPUS) + next_cpu = first_cpu(cpu_online_map); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); } spin_unlock(&watchdog_lock); } @@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) if (!started && watchdog) { watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, first_cpu(cpu_online_map)); } } else { if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) @@ -175,7 +180,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) if (watchdog) del_timer(&watchdog_timer); watchdog = cs; - init_timer(&watchdog_timer); + init_timer_deferrable(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; /* Reset watchdog cycles */ @@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, + first_cpu(cpu_online_map)); } } } @@ -331,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating) spin_unlock_irqrestore(&clocksource_lock, flags); } +/** + * clocksource_unregister - remove a registered clocksource + */ +void clocksource_unregister(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&clocksource_lock, flags); + list_del(&cs->list); + if (clocksource_override == cs) + clocksource_override = NULL; + next_clocksource = select_clocksource(); + spin_unlock_irqrestore(&clocksource_lock, flags); +} + #ifdef CONFIG_SYSFS /** * sysfs_show_current_clocksources - sysfs interface for current clocksource diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 5b86698faa0..e1bd50cbbf5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) /* * Broadcast the event to the cpus, which are set in the mask */ -int tick_do_broadcast(cpumask_t mask) +static void tick_do_broadcast(cpumask_t mask) { - int ret = 0, cpu = smp_processor_id(); + int cpu = smp_processor_id(); struct tick_device *td; /* @@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask) cpu_clear(cpu, mask); td = &per_cpu(tick_cpu_device, cpu); td->evtdev->event_handler(td->evtdev); - ret = 1; } if (!cpus_empty(mask)) { @@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask) cpu = first_cpu(mask); td = &per_cpu(tick_cpu_device, cpu); td->evtdev->broadcast(mask); - ret = 1; } - return ret; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index bb13f272490..f13f2b7f4fd 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) * Broadcasting support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern int tick_do_broadcast(cpumask_t mask); - extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern int tick_check_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1a21b6fdb67..63f24b55069 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -9,7 +9,7 @@ * * Started by: Thomas Gleixner and Ingo Molnar * - * For licencing details see kernel-base/COPYING + * Distribute under GPLv2. */ #include <linux/cpu.h> #include <linux/err.h> @@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void) local_irq_restore(flags); } +void tick_nohz_stop_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + if (ts->idle_active) { + ktime_t now, delta; + now = ktime_get(); + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_active = 0; + } +} + +static ktime_t tick_nohz_start_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, delta; + + now = ktime_get(); + if (ts->idle_active) { + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + } + ts->idle_entrytime = now; + ts->idle_active = 1; + return now; +} + +u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + *last_update_time = ktime_to_us(ts->idle_lastupdate); + return ktime_to_us(ts->idle_sleeptime); +} + /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * @@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void) unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; unsigned long rt_jiffies; struct tick_sched *ts; - ktime_t last_update, expires, now, delta; + ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; int cpu; local_irq_save(flags); cpu = smp_processor_id(); + now = tick_nohz_start_idle(cpu); ts = &per_cpu(tick_cpu_sched, cpu); /* @@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void) } } - now = ktime_get(); - /* - * When called from irq_exit we need to account the idle sleep time - * correctly. - */ - if (ts->tick_stopped) { - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - } - - ts->idle_entrytime = now; ts->idle_calls++; - /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -296,7 +323,7 @@ void tick_nohz_stop_sched_tick(void) /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; - } else if(!tick_program_event(expires, 0)) + } else if (!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a @@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void) int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long ticks; - ktime_t now, delta; + ktime_t now; - if (!ts->tick_stopped) + local_irq_disable(); + tick_nohz_stop_idle(cpu); + + if (!ts->tick_stopped) { + local_irq_enable(); return; + } /* Update jiffies first */ - now = ktime_get(); - - local_irq_disable(); select_nohz_load_balancer(0); + now = ktime_get(); tick_do_update_jiffies64(now); cpu_clear(cpu, nohz_cpu_mask); - /* Account the idle time */ - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -507,7 +533,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } */ #ifdef CONFIG_HIGH_RES_TIMERS /* - * We rearm the timer until we get disabled by the idle code + * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled and timer->base->cpu_base->lock held. */ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ab46ae8c062..092a2366b5a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void) } /** - * __get_realtime_clock_ts - Returns the time of day in a timespec + * getnstimeofday - Returns the time of day in a timespec * @ts: pointer to the timespec to be set * - * Returns the time of day in a timespec. Used by - * do_gettimeofday() and get_realtime_clock_ts(). + * Returns the time of day in a timespec. */ -static inline void __get_realtime_clock_ts(struct timespec *ts) +void getnstimeofday(struct timespec *ts) { unsigned long seq; s64 nsecs; @@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts) timespec_add_ns(ts, nsecs); } -/** - * getnstimeofday - Returns the time of day in a timespec - * @ts: pointer to the timespec to be set - * - * Returns the time of day in a timespec. - */ -void getnstimeofday(struct timespec *ts) -{ - __get_realtime_clock_ts(ts); -} - EXPORT_SYMBOL(getnstimeofday); /** * do_gettimeofday - Returns the time of day in a timeval * @tv: pointer to the timeval to be set * - * NOTE: Users should be converted to using get_realtime_clock_ts() + * NOTE: Users should be converted to using getnstimeofday() */ void do_gettimeofday(struct timeval *tv) { struct timespec now; - __get_realtime_clock_ts(&now); + getnstimeofday(&now); tv->tv_sec = now.tv_sec; tv->tv_usec = now.tv_nsec/1000; } @@ -198,7 +186,8 @@ static void change_clocksource(void) clock->error = 0; clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); tick_clock_notify(); @@ -255,7 +244,8 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); clock->cycle_last = clocksource_read(clock); xtime.tv_sec = sec; diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index c36bb7ed030..417da8c5bc7 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -26,7 +26,7 @@ * the pid and cmdline from the owner process if applicable. * * Start/stop data collection: - * # echo 1[0] >/proc/timer_stats + * # echo [1|0] >/proc/timer_stats * * Display the information collected so far: * # cat /proc/timer_stats diff --git a/kernel/timer.c b/kernel/timer.c index f739dfb539c..23f7ead78fa 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64); #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) -typedef struct tvec_s { +struct tvec { struct list_head vec[TVN_SIZE]; -} tvec_t; +}; -typedef struct tvec_root_s { +struct tvec_root { struct list_head vec[TVR_SIZE]; -} tvec_root_t; +}; -struct tvec_t_base_s { +struct tvec_base { spinlock_t lock; struct timer_list *running_timer; unsigned long timer_jiffies; - tvec_root_t tv1; - tvec_t tv2; - tvec_t tv3; - tvec_t tv4; - tvec_t tv5; + struct tvec_root tv1; + struct tvec tv2; + struct tvec tv3; + struct tvec tv4; + struct tvec tv5; } ____cacheline_aligned; -typedef struct tvec_t_base_s tvec_base_t; - -tvec_base_t boot_tvec_bases; +struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); -static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; +static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; /* - * Note that all tvec_bases is 2 byte aligned and lower bit of + * Note that all tvec_bases are 2 byte aligned and lower bit of * base in timer_list is guaranteed to be zero. Use the LSB for * the new flag to indicate whether the timer is deferrable */ #define TBASE_DEFERRABLE_FLAG (0x1) /* Functions below help us manage 'deferrable' flag */ -static inline unsigned int tbase_get_deferrable(tvec_base_t *base) +static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); } -static inline tvec_base_t *tbase_get_base(tvec_base_t *base) +static inline struct tvec_base *tbase_get_base(struct tvec_base *base) { - return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); + return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | + timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | TBASE_DEFERRABLE_FLAG)); } static inline void -timer_set_base(struct timer_list *timer, tvec_base_t *new_base) +timer_set_base(struct timer_list *timer, struct tvec_base *new_base) { - timer->base = (tvec_base_t *)((unsigned long)(new_base) | + timer->base = (struct tvec_base *)((unsigned long)(new_base) | tbase_get_deferrable(timer->base)); } @@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(unsigned long j) EXPORT_SYMBOL_GPL(round_jiffies_relative); -static inline void set_running_timer(tvec_base_t *base, +static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) { #ifdef CONFIG_SMP @@ -254,7 +252,7 @@ static inline void set_running_timer(tvec_base_t *base, #endif } -static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; @@ -371,14 +369,14 @@ static inline void detach_timer(struct timer_list *timer, * possible to set timer->base = NULL and drop the lock: the timer remains * locked. */ -static tvec_base_t *lock_timer_base(struct timer_list *timer, +static struct tvec_base *lock_timer_base(struct timer_list *timer, unsigned long *flags) __acquires(timer->base->lock) { - tvec_base_t *base; + struct tvec_base *base; for (;;) { - tvec_base_t *prelock_base = timer->base; + struct tvec_base *prelock_base = timer->base; base = tbase_get_base(prelock_base); if (likely(base != NULL)) { spin_lock_irqsave(&base->lock, *flags); @@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(struct timer_list *timer, int __mod_timer(struct timer_list *timer, unsigned long expires) { - tvec_base_t *base, *new_base; + struct tvec_base *base, *new_base; unsigned long flags; int ret = 0; @@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - tvec_base_t *base = per_cpu(tvec_bases, cpu); + struct tvec_base *base = per_cpu(tvec_bases, cpu); unsigned long flags; timer_stats_timer_set_start_info(timer); @@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer); */ int del_timer(struct timer_list *timer) { - tvec_base_t *base; + struct tvec_base *base; unsigned long flags; int ret = 0; @@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer); */ int try_to_del_timer_sync(struct timer_list *timer) { - tvec_base_t *base; + struct tvec_base *base; unsigned long flags; int ret = -1; @@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(del_timer_sync); #endif -static int cascade(tvec_base_t *base, tvec_t *tv, int index) +static int cascade(struct tvec_base *base, struct tvec *tv, int index) { /* cascade all the timers from tv up one level */ struct timer_list *timer, *tmp; @@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) * This function cascades all vectors and executes all expired timer * vectors. */ -static inline void __run_timers(tvec_base_t *base) +static inline void __run_timers(struct tvec_base *base) { struct timer_list *timer; @@ -657,7 +655,7 @@ static inline void __run_timers(tvec_base_t *base) int preempt_count = preempt_count(); fn(data); if (preempt_count != preempt_count()) { - printk(KERN_WARNING "huh, entered %p " + printk(KERN_ERR "huh, entered %p " "with preempt_count %08x, exited" " with %08x?\n", fn, preempt_count, @@ -678,13 +676,13 @@ static inline void __run_timers(tvec_base_t *base) * is used on S/390 to stop all activity when a cpus is idle. * This functions needs to be called disabled. */ -static unsigned long __next_timer_interrupt(tvec_base_t *base) +static unsigned long __next_timer_interrupt(struct tvec_base *base) { unsigned long timer_jiffies = base->timer_jiffies; unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; int index, slot, array, found = 0; struct timer_list *nte; - tvec_t *varray[4]; + struct tvec *varray[4]; /* Look for timer events in tv1. */ index = slot = timer_jiffies & TVR_MASK; @@ -716,7 +714,7 @@ cascade: varray[3] = &base->tv5; for (array = 0; array < 4; array++) { - tvec_t *varp = varray[array]; + struct tvec *varp = varray[array]; index = slot = timer_jiffies & TVN_MASK; do { @@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, */ unsigned long get_next_timer_interrupt(unsigned long now) { - tvec_base_t *base = __get_cpu_var(tvec_bases); + struct tvec_base *base = __get_cpu_var(tvec_bases); unsigned long expires; spin_lock(&base->lock); @@ -894,7 +892,7 @@ static inline void calc_load(unsigned long ticks) */ static void run_timer_softirq(struct softirq_action *h) { - tvec_base_t *base = __get_cpu_var(tvec_bases); + struct tvec_base *base = __get_cpu_var(tvec_bases); hrtimer_run_pending(); @@ -1223,7 +1221,7 @@ static struct lock_class_key base_lock_keys[NR_CPUS]; static int __cpuinit init_timers_cpu(int cpu) { int j; - tvec_base_t *base; + struct tvec_base *base; static char __cpuinitdata tvec_base_done[NR_CPUS]; if (!tvec_base_done[cpu]) { @@ -1278,7 +1276,7 @@ static int __cpuinit init_timers_cpu(int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) +static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) { struct timer_list *timer; @@ -1292,8 +1290,8 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) static void __cpuinit migrate_timers(int cpu) { - tvec_base_t *old_base; - tvec_base_t *new_base; + struct tvec_base *old_base; + struct tvec_base *new_base; int i; BUG_ON(cpu_online(cpu)); |