diff options
Diffstat (limited to 'kernel')
74 files changed, 632 insertions, 291 deletions
diff --git a/kernel/async.c b/kernel/async.c index 27235f5de19..15319d6c18f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -56,6 +56,7 @@ asynchronous and synchronous parts of the kernel. #include <linux/init.h> #include <linux/kthread.h> #include <linux/delay.h> +#include <linux/slab.h> #include <asm/atomic.h> static async_cookie_t next_cookie = 1; diff --git a/kernel/audit.c b/kernel/audit.c index 78f7f86aa23..c71bd26631a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -46,6 +46,7 @@ #include <asm/atomic.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/err.h> #include <linux/kthread.h> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 028e85663f2..46a57b57a33 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -3,6 +3,7 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/kthread.h> +#include <linux/slab.h> struct audit_tree; struct audit_chunk; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index cc7e87936cb..8df43696f4b 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/netlink.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/inotify.h> #include <linux/security.h> #include "audit.h" diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a70604047f3..ce08041f578 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/netlink.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/security.h> #include "audit.h" diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f3a461c0970..3828ad5fb8f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -49,6 +49,7 @@ #include <linux/namei.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/socket.h> #include <linux/mqueue.h> @@ -1893,7 +1894,7 @@ static int audit_inc_name_count(struct audit_context *context, { if (context->name_count >= AUDIT_NAMES) { if (inode) - printk(KERN_DEBUG "name_count maxed, losing inode data: " + printk(KERN_DEBUG "audit: name_count maxed, losing inode data: " "dev=%02x:%02x, inode=%lu\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ef909a32975..3a53c771e50 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -27,7 +27,6 @@ */ #include <linux/cgroup.h> -#include <linux/module.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> @@ -1647,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry) int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) { char *start; - struct dentry *dentry = rcu_dereference(cgrp->dentry); + struct dentry *dentry = rcu_dereference_check(cgrp->dentry, + rcu_read_lock_held() || + cgroup_lock_is_held()); if (!dentry || cgrp == dummytop) { /* @@ -1663,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) *--start = '\0'; for (;;) { int len = dentry->d_name.len; + if ((start -= len) < buf) return -ENAMETOOLONG; - memcpy(start, cgrp->dentry->d_name.name, len); + memcpy(start, dentry->d_name.name, len); cgrp = cgrp->parent; if (!cgrp) break; - dentry = rcu_dereference(cgrp->dentry); + + dentry = rcu_dereference_check(cgrp->dentry, + rcu_read_lock_held() || + cgroup_lock_is_held()); if (!cgrp->parent) continue; if (--start < buf) @@ -4556,13 +4561,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, { int subsys_id, i, depth = 0; struct cgroup_subsys_state *parent_css, *child_css; - struct css_id *child_id, *parent_id = NULL; + struct css_id *child_id, *parent_id; subsys_id = ss->subsys_id; parent_css = parent->subsys[subsys_id]; child_css = child->subsys[subsys_id]; - depth = css_depth(parent_css) + 1; parent_id = parent_css->id; + depth = parent_id->depth; child_id = get_new_cssid(ss, depth); if (IS_ERR(child_id)) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 59e9ef6aab4..e5c0244962b 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -15,6 +15,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/uaccess.h> @@ -47,17 +48,20 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -int cgroup_frozen(struct task_struct *task) +int cgroup_freezing_or_frozen(struct task_struct *task) { struct freezer *freezer; enum freezer_state state; task_lock(task); freezer = task_freezer(task); - state = freezer->state; + if (!freezer->css.cgroup->parent) + state = CGROUP_THAWED; /* root cgroup can't be frozen */ + else + state = freezer->state; task_unlock(task); - return state == CGROUP_FROZEN; + return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); } /* @@ -201,9 +205,12 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) * No lock is needed, since the task isn't on tasklist yet, * so it can't be moved to another cgroup, which means the * freezer won't be removed and will be valid during this - * function call. + * function call. Nevertheless, apply RCU read-side critical + * section to suppress RCU lockdep false positives. */ + rcu_read_lock(); freezer = task_freezer(task); + rcu_read_unlock(); /* * The root cgroup is non-freezable, so we can skip the diff --git a/kernel/compat.c b/kernel/compat.c index f6c204f07ea..7f40e9275fd 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -25,6 +25,7 @@ #include <linux/posix-timers.h> #include <linux/times.h> #include <linux/ptrace.h> +#include <linux/gfp.h> #include <asm/uaccess.h> diff --git a/kernel/cpu.c b/kernel/cpu.c index f8cced2692b..25bba73b1be 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -14,6 +14,7 @@ #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> +#include <linux/gfp.h> #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ba401fab459..d10946748ec 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * call to guarantee_online_mems(), as we know no one is changing * our task's cpuset. * - * Hold callback_mutex around the two modifications of our tasks - * mems_allowed to synchronize with cpuset_mems_allowed(). - * * While the mm_struct we are migrating is typically from some * other task, the task_struct mems_allowed that we are hacking * is for our current task, which must allocate new pages for that @@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p, struct cpuset *cs; int migrate; const nodemask_t *oldmem = scan->data; - nodemask_t newmems; + NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); + + if (!newmems) + return; cs = cgroup_cs(scan->cg); - guarantee_online_mems(cs, &newmems); + guarantee_online_mems(cs, newmems); task_lock(p); - cpuset_change_task_nodemask(p, &newmems); + cpuset_change_task_nodemask(p, newmems); task_unlock(p); + NODEMASK_FREE(newmems); + mm = get_task_mm(p); if (!mm) return; @@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { - nodemask_t oldmem; + NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL); int retval; struct ptr_heap heap; + if (!oldmem) + return -ENOMEM; + /* * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; * it's read-only */ - if (cs == &top_cpuset) - return -EACCES; + if (cs == &top_cpuset) { + retval = -EACCES; + goto done; + } /* * An empty mems_allowed is ok iff there are no tasks in the cpuset. @@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; if (!nodes_subset(trialcs->mems_allowed, - node_states[N_HIGH_MEMORY])) - return -EINVAL; + node_states[N_HIGH_MEMORY])) { + retval = -EINVAL; + goto done; + } } - oldmem = cs->mems_allowed; - if (nodes_equal(oldmem, trialcs->mems_allowed)) { + *oldmem = cs->mems_allowed; + if (nodes_equal(*oldmem, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } @@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask(cs, &oldmem, &heap); + update_tasks_nodemask(cs, oldmem, &heap); heap_free(&heap); done: + NODEMASK_FREE(oldmem); return retval; } @@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *oldcont, struct task_struct *tsk, bool threadgroup) { - nodemask_t from, to; struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); + NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); + NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); + + if (from == NULL || to == NULL) + goto alloc_fail; if (cs == &top_cpuset) { cpumask_copy(cpus_attach, cpu_possible_mask); - to = node_possible_map; } else { guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(cs, &to); } + guarantee_online_mems(cs, to); /* do per-task migration stuff possibly for each in the threadgroup */ - cpuset_attach_task(tsk, &to, cs); + cpuset_attach_task(tsk, to, cs); if (threadgroup) { struct task_struct *c; rcu_read_lock(); list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { - cpuset_attach_task(c, &to, cs); + cpuset_attach_task(c, to, cs); } rcu_read_unlock(); } /* change mm; only needs to be done once even if threadgroup */ - from = oldcs->mems_allowed; - to = cs->mems_allowed; + *from = oldcs->mems_allowed; + *to = cs->mems_allowed; mm = get_task_mm(tsk); if (mm) { - mpol_rebind_mm(mm, &to); + mpol_rebind_mm(mm, to); if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &from, &to); + cpuset_migrate_mm(mm, from, to); mmput(mm); } + +alloc_fail: + NODEMASK_FREE(from); + NODEMASK_FREE(to); } /* The various types of files and directories in a cpuset file system */ @@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) { - nodemask_t mask; + NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); + int retval; + + if (mask == NULL) + return -ENOMEM; mutex_lock(&callback_mutex); - mask = cs->mems_allowed; + *mask = cs->mems_allowed; mutex_unlock(&callback_mutex); - return nodelist_scnprintf(page, PAGE_SIZE, mask); + retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); + + NODEMASK_FREE(mask); + + return retval; } static ssize_t cpuset_common_file_read(struct cgroup *cont, @@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) struct cpuset *cp; /* scans cpusets being updated */ struct cpuset *child; /* scans child cpusets of cp */ struct cgroup *cont; - nodemask_t oldmems; + NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); + + if (oldmems == NULL) + return; list_add_tail((struct list_head *)&root->stack_list, &queue); @@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) continue; - oldmems = cp->mems_allowed; + *oldmems = cp->mems_allowed; /* Remove offline cpus and mems from this cpuset. */ mutex_lock(&callback_mutex); @@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) remove_tasks_in_empty_cpuset(cp); else { update_tasks_cpumask(cp, NULL); - update_tasks_nodemask(cp, &oldmems, NULL); + update_tasks_nodemask(cp, oldmems, NULL); } } + NODEMASK_FREE(oldmems); } /* @@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, static int cpuset_track_online_nodes(struct notifier_block *self, unsigned long action, void *arg) { + NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); + + if (oldmems == NULL) + return NOTIFY_DONE; + cgroup_lock(); switch (action) { case MEM_ONLINE: - case MEM_OFFLINE: + *oldmems = top_cpuset.mems_allowed; mutex_lock(&callback_mutex); top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; mutex_unlock(&callback_mutex); - if (action == MEM_OFFLINE) - scan_for_empty_cpusets(&top_cpuset); + update_tasks_nodemask(&top_cpuset, oldmems, NULL); + break; + case MEM_OFFLINE: + /* + * needn't update top_cpuset.mems_allowed explicitly because + * scan_for_empty_cpusets() will update it. + */ + scan_for_empty_cpusets(&top_cpuset); break; default: break; } cgroup_unlock(); + + NODEMASK_FREE(oldmems); return NOTIFY_OK; } #endif diff --git a/kernel/cred.c b/kernel/cred.c index 1ed8ca18790..62af1816c23 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> #include <linux/cred.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/key.h> #include <linux/keyctl.h> @@ -364,7 +365,7 @@ struct cred *prepare_usermodehelper_creds(void) new = kmem_cache_alloc(cred_jar, GFP_ATOMIC); if (!new) - return NULL; + goto free_tgcred; kdebug("prepare_usermodehelper_creds() alloc %p", new); @@ -398,6 +399,12 @@ struct cred *prepare_usermodehelper_creds(void) error: put_cred(new); return NULL; + +free_tgcred: +#ifdef CONFIG_KEYS + kfree(tgcred); +#endif + return NULL; } /* @@ -786,8 +793,6 @@ bool creds_are_invalid(const struct cred *cred) { if (cred->magic != CRED_MAGIC) return true; - if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers)) - return true; #ifdef CONFIG_SECURITY_SELINUX if (selinux_is_enabled()) { if ((unsigned long) cred->security < PAGE_SIZE) diff --git a/kernel/early_res.c b/kernel/early_res.c index 3cb2c661bb7..31aa9332ef3 100644 --- a/kernel/early_res.c +++ b/kernel/early_res.c @@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end) struct early_res *r; int i; + if (start == end) + return; + + if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) + return; + try_next: i = find_overlapped_early(start, end); if (i >= max_early_res) diff --git a/kernel/exit.c b/kernel/exit.c index cce59cb5ee6..7f2683a10ac 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,7 +953,8 @@ NORET_TYPE void do_exit(long code) acct_update_integrals(tsk); /* sync mm's RSS info before statistics gathering */ - sync_mm_rss(tsk, tsk->mm); + if (tsk->mm) + sync_mm_rss(tsk, tsk->mm); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index 4799c5f0e6d..44b0791b0a2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1052,6 +1052,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; #endif +#if defined(SPLIT_RSS_COUNTING) + memset(&p->rss_stat, 0, sizeof(p->rss_stat)); +#endif p->default_timer_slack_ns = current->timer_slack_ns; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 42ec11b2af8..b7091d5ca2f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) if (desc->chip->ack) desc->chip->ack(irq); } + desc->status |= IRQ_MASKED; +} + +static inline void mask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->mask) { + desc->chip->mask(irq); + desc->status |= IRQ_MASKED; + } +} + +static inline void unmask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->unmask) { + desc->chip->unmask(irq); + desc->status &= ~IRQ_MASKED; + } } /* @@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; - if (unlikely(desc->status & IRQ_ONESHOT)) - desc->status |= IRQ_MASKED; - else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) - desc->chip->unmask(irq); + if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) + unmask_irq(desc, irq); out_unlock: raw_spin_unlock(&desc->lock); } @@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) action = desc->action; if (unlikely(!action || (desc->status & IRQ_DISABLED))) { desc->status |= IRQ_PENDING; - if (desc->chip->mask) - desc->chip->mask(irq); + mask_irq(desc, irq); goto out; } @@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) irqreturn_t action_ret; if (unlikely(!action)) { - desc->chip->mask(irq); + mask_irq(desc, irq); goto out_unlock; } @@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) if (unlikely((desc->status & (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_MASKED))) { - desc->chip->unmask(irq); - desc->status &= ~IRQ_MASKED; + unmask_irq(desc, irq); } desc->status &= ~IRQ_PENDING; @@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, __set_irq_handler(irq, handle, 0, name); } -void __init set_irq_noprobe(unsigned int irq) +void set_irq_noprobe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } -void __init set_irq_probe(unsigned int irq) +void set_irq_probe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index eb6078ca60c..704e488730a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; + unsigned long flags; if (!desc) return 0; @@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) if (desc->status & IRQ_NOREQUEST) return 0; + raw_spin_lock_irqsave(&desc->lock, flags); action = desc->action; if (action) if (irqflags & action->flags & IRQF_SHARED) action = NULL; + raw_spin_unlock_irqrestore(&desc->lock, flags); + return !action; } @@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action) */ static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) { +again: chip_bus_lock(irq, desc); raw_spin_lock_irq(&desc->lock); + + /* + * Implausible though it may be we need to protect us against + * the following scenario: + * + * The thread is faster done than the hard interrupt handler + * on the other CPU. If we unmask the irq line then the + * interrupt can come in again and masks the line, leaves due + * to IRQ_INPROGRESS and the irq line is masked forever. + */ + if (unlikely(desc->status & IRQ_INPROGRESS)) { + raw_spin_unlock_irq(&desc->lock); + chip_bus_sync_unlock(irq, desc); + cpu_relax(); + goto again; + } + if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { desc->status &= ~IRQ_MASKED; desc->chip->unmask(irq); @@ -735,6 +757,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_ONESHOT) desc->status |= IRQ_ONESHOT; + /* + * Force MSI interrupts to run with interrupts + * disabled. The multi vector cards can cause stack + * overflows due to nested interrupts when enough of + * them are directed to a core and fire at the same + * time. + */ + if (desc->msi_desc) + new->flags |= IRQF_DISABLED; + if (!(desc->status & IRQ_NOAUTOEN)) { desc->depth = 0; desc->status &= ~IRQ_DISABLED; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 963559dbd85..65d3845665a 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -6,6 +6,7 @@ */ #include <linux/irq.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6f50eccc79c..7a6eb04ef6b 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -7,6 +7,7 @@ */ #include <linux/irq.h> +#include <linux/gfp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/interrupt.h> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8e5288a8a35..13aff293f4d 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -21,6 +21,7 @@ #include <linux/sched.h> /* for cond_resched */ #include <linux/mm.h> #include <linux/ctype.h> +#include <linux/slab.h> #include <asm/sections.h> diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 761fdd2b303..11f3515ca83 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -69,9 +69,16 @@ struct kgdb_state { struct pt_regs *linux_regs; }; +/* Exception state values */ +#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */ +#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */ +#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */ +#define DCPU_SSTEP 0x8 /* CPU is single stepping */ + static struct debuggerinfo_struct { void *debuggerinfo; struct task_struct *task; + int exception_state; } kgdb_info[NR_CPUS]; /** @@ -391,27 +398,22 @@ int kgdb_mem2hex(char *mem, char *buf, int count) /* * Copy the binary array pointed to by buf into mem. Fix $, #, and - * 0x7d escaped with 0x7d. Return a pointer to the character after - * the last byte written. + * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success. + * The input buf is overwitten with the result to write to mem. */ static int kgdb_ebin2mem(char *buf, char *mem, int count) { - int err = 0; - char c; + int size = 0; + char *c = buf; while (count-- > 0) { - c = *buf++; - if (c == 0x7d) - c = *buf++ ^ 0x20; - - err = probe_kernel_write(mem, &c, 1); - if (err) - break; - - mem++; + c[size] = *buf++; + if (c[size] == 0x7d) + c[size] = *buf++ ^ 0x20; + size++; } - return err; + return probe_kernel_write(mem, c, size); } /* @@ -563,49 +565,6 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid) } /* - * CPU debug state control: - */ - -#ifdef CONFIG_SMP -static void kgdb_wait(struct pt_regs *regs) -{ - unsigned long flags; - int cpu; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - kgdb_info[cpu].debuggerinfo = regs; - kgdb_info[cpu].task = current; - /* - * Make sure the above info reaches the primary CPU before - * our cpu_in_kgdb[] flag setting does: - */ - smp_wmb(); - atomic_set(&cpu_in_kgdb[cpu], 1); - - /* Disable any cpu specific hw breakpoints */ - kgdb_disable_hw_debug(regs); - - /* Wait till primary CPU is done with debugging */ - while (atomic_read(&passive_cpu_wait[cpu])) - cpu_relax(); - - kgdb_info[cpu].debuggerinfo = NULL; - kgdb_info[cpu].task = NULL; - - /* fix up hardware debug registers on local cpu */ - if (arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - - /* Signal the primary CPU that we are done: */ - atomic_set(&cpu_in_kgdb[cpu], 0); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); - local_irq_restore(flags); -} -#endif - -/* * Some architectures need cache flushes when we set/clear a * breakpoint: */ @@ -1400,34 +1359,13 @@ static int kgdb_reenter_check(struct kgdb_state *ks) return 1; } -/* - * kgdb_handle_exception() - main entry point from a kernel exception - * - * Locking hierarchy: - * interface locks, if any (begin_session) - * kgdb lock (kgdb_active) - */ -int -kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) +static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) { - struct kgdb_state kgdb_var; - struct kgdb_state *ks = &kgdb_var; unsigned long flags; int sstep_tries = 100; int error = 0; int i, cpu; - - ks->cpu = raw_smp_processor_id(); - ks->ex_vector = evector; - ks->signo = signo; - ks->ex_vector = evector; - ks->err_code = ecode; - ks->kgdb_usethreadid = 0; - ks->linux_regs = regs; - - if (kgdb_reenter_check(ks)) - return 0; /* Ouch, double exception ! */ - + int trace_on = 0; acquirelock: /* * Interrupts will be restored by the 'trap return' code, except when @@ -1435,13 +1373,43 @@ acquirelock: */ local_irq_save(flags); - cpu = raw_smp_processor_id(); + cpu = ks->cpu; + kgdb_info[cpu].debuggerinfo = regs; + kgdb_info[cpu].task = current; + /* + * Make sure the above info reaches the primary CPU before + * our cpu_in_kgdb[] flag setting does: + */ + atomic_inc(&cpu_in_kgdb[cpu]); /* - * Acquire the kgdb_active lock: + * CPU will loop if it is a slave or request to become a kgdb + * master cpu and acquire the kgdb_active lock: */ - while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1) + while (1) { + if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { + if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) + break; + } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { + if (!atomic_read(&passive_cpu_wait[cpu])) + goto return_normal; + } else { +return_normal: + /* Return to normal operation by executing any + * hw breakpoint fixup. + */ + if (arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + if (trace_on) + tracing_on(); + atomic_dec(&cpu_in_kgdb[cpu]); + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + local_irq_restore(flags); + return 0; + } cpu_relax(); + } /* * For single stepping, try to only enter on the processor @@ -1475,9 +1443,6 @@ acquirelock: if (kgdb_io_ops->pre_exception) kgdb_io_ops->pre_exception(); - kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs; - kgdb_info[ks->cpu].task = current; - kgdb_disable_hw_debug(ks->linux_regs); /* @@ -1486,15 +1451,9 @@ acquirelock: */ if (!kgdb_single_step) { for (i = 0; i < NR_CPUS; i++) - atomic_set(&passive_cpu_wait[i], 1); + atomic_inc(&passive_cpu_wait[i]); } - /* - * spin_lock code is good enough as a barrier so we don't - * need one here: - */ - atomic_set(&cpu_in_kgdb[ks->cpu], 1); - #ifdef CONFIG_SMP /* Signal the other CPUs to enter kgdb_wait() */ if ((!kgdb_single_step) && kgdb_do_roundup) @@ -1518,6 +1477,9 @@ acquirelock: kgdb_single_step = 0; kgdb_contthread = current; exception_level = 0; + trace_on = tracing_is_on(); + if (trace_on) + tracing_off(); /* Talk to debugger with gdbserial protocol */ error = gdb_serial_stub(ks); @@ -1526,13 +1488,11 @@ acquirelock: if (kgdb_io_ops->post_exception) kgdb_io_ops->post_exception(); - kgdb_info[ks->cpu].debuggerinfo = NULL; - kgdb_info[ks->cpu].task = NULL; - atomic_set(&cpu_in_kgdb[ks->cpu], 0); + atomic_dec(&cpu_in_kgdb[ks->cpu]); if (!kgdb_single_step) { for (i = NR_CPUS-1; i >= 0; i--) - atomic_set(&passive_cpu_wait[i], 0); + atomic_dec(&passive_cpu_wait[i]); /* * Wait till all the CPUs have quit * from the debugger. @@ -1551,6 +1511,8 @@ kgdb_restore: else kgdb_sstep_pid = 0; } + if (trace_on) + tracing_on(); /* Free kgdb_active */ atomic_set(&kgdb_active, -1); touch_softlockup_watchdog_sync(); @@ -1560,13 +1522,52 @@ kgdb_restore: return error; } +/* + * kgdb_handle_exception() - main entry point from a kernel exception + * + * Locking hierarchy: + * interface locks, if any (begin_session) + * kgdb lock (kgdb_active) + */ +int +kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) +{ + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + int ret; + + ks->cpu = raw_smp_processor_id(); + ks->ex_vector = evector; + ks->signo = signo; + ks->ex_vector = evector; + ks->err_code = ecode; + ks->kgdb_usethreadid = 0; + ks->linux_regs = regs; + + if (kgdb_reenter_check(ks)) + return 0; /* Ouch, double exception ! */ + kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; + ret = kgdb_cpu_enter(ks, regs); + kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER; + return ret; +} + int kgdb_nmicallback(int cpu, void *regs) { #ifdef CONFIG_SMP + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + + memset(ks, 0, sizeof(struct kgdb_state)); + ks->cpu = cpu; + ks->linux_regs = regs; + if (!atomic_read(&cpu_in_kgdb[cpu]) && - atomic_read(&kgdb_active) != cpu && - atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) { - kgdb_wait((struct pt_regs *)regs); + atomic_read(&kgdb_active) != -1 && + atomic_read(&kgdb_active) != cpu) { + kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; + kgdb_cpu_enter(ks, regs); + kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; return 0; } #endif @@ -1742,11 +1743,11 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); */ void kgdb_breakpoint(void) { - atomic_set(&kgdb_setting_breakpoint, 1); + atomic_inc(&kgdb_setting_breakpoint); wmb(); /* Sync point before breakpoint */ arch_kgdb_breakpoint(); wmb(); /* Sync point after breakpoint */ - atomic_set(&kgdb_setting_breakpoint, 0); + atomic_dec(&kgdb_setting_breakpoint); } EXPORT_SYMBOL_GPL(kgdb_breakpoint); diff --git a/kernel/kthread.c b/kernel/kthread.c index 82ed0ea1519..83911c78017 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -219,7 +219,7 @@ int kthreadd(void *unused) set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_cpus_allowed_ptr(tsk, cpu_all_mask); - set_mems_allowed(node_possible_map); + set_mems_allowed(node_states[N_HIGH_MEMORY]); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index ca07c5c0c91..877fb306d41 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -56,7 +56,6 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/list.h> -#include <linux/slab.h> #include <linux/stacktrace.h> static DEFINE_SPINLOCK(latency_lock); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c927a549db2..2594e1ce41c 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -43,6 +43,7 @@ #include <linux/ftrace.h> #include <linux/stringify.h> #include <linux/bitops.h> +#include <linux/gfp.h> #include <asm/sections.h> @@ -582,9 +583,6 @@ static int static_obj(void *obj) unsigned long start = (unsigned long) &_stext, end = (unsigned long) &_end, addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif /* * static variable? @@ -595,24 +593,16 @@ static int static_obj(void *obj) if (arch_is_kernel_data(addr)) return 1; -#ifdef CONFIG_SMP /* - * percpu var? + * in-kernel percpu var? */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif + if (is_kernel_percpu_address(addr)) + return 1; /* - * module var? + * module static or percpu var? */ - return is_module_address(addr); + return is_module_address(addr) || is_module_percpu_address(addr); } /* diff --git a/kernel/module.c b/kernel/module.c index c968d3606dc..1016b75b026 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); +} + +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,12 +469,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ @@ -473,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -616,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -798,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1400,8 +1467,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 2ab67233ee8..f74e6c00e26 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -13,6 +13,7 @@ * Pavel Emelianov <xemul@openvz.org> */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/nsproxy.h> #include <linux/init_task.h> diff --git a/kernel/padata.c b/kernel/padata.c index 93caf65ff57..fd03513c732 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -25,6 +25,7 @@ #include <linux/padata.h> #include <linux/mutex.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/rcupdate.h> #define MAX_SEQ_NR INT_MAX - NR_CPUS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 574ee58a304..3d1552d3c12 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -15,6 +15,7 @@ #include <linux/smp.h> #include <linux/file.h> #include <linux/poll.h> +#include <linux/slab.h> #include <linux/sysfs.h> #include <linux/dcache.h> #include <linux/percpu.h> @@ -1164,11 +1165,9 @@ void perf_event_task_sched_out(struct task_struct *task, struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; - struct pt_regs *regs; int do_switch = 1; - regs = task_pt_regs(task); - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); if (likely(!ctx || !cpuctx->task_ctx)) return; @@ -2786,12 +2785,11 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return NULL; } -#ifdef CONFIG_EVENT_TRACING __weak void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) { } -#endif + /* * Output @@ -3378,15 +3376,23 @@ static void perf_event_task_output(struct perf_event *event, struct perf_task_event *task_event) { struct perf_output_handle handle; - int size; struct task_struct *task = task_event->task; - int ret; + unsigned long flags; + int size, ret; + + /* + * If this CPU attempts to acquire an rq lock held by a CPU spinning + * in perf_output_lock() from interrupt context, it's game over. + */ + local_irq_save(flags); size = task_event->event_id.header.size; ret = perf_output_begin(&handle, event, size, 0, 0); - if (ret) + if (ret) { + local_irq_restore(flags); return; + } task_event->event_id.pid = perf_event_pid(event, task); task_event->event_id.ppid = perf_event_pid(event, current); @@ -3397,6 +3403,7 @@ static void perf_event_task_output(struct perf_event *event, perf_output_put(&handle, task_event->event_id); perf_output_end(&handle); + local_irq_restore(flags); } static int perf_event_task_match(struct perf_event *event) @@ -4890,7 +4897,7 @@ err_fput_free_put_context: err_free_put_context: if (err < 0) - kfree(event); + free_event(event); err_put_context: if (err < 0) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 79aac93acf9..a5aff94e1f0 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -13,6 +13,7 @@ #include <linux/syscalls.h> #include <linux/err.h> #include <linux/acct.h> +#include <linux/slab.h> #define BITS_PER_PAGE (PAGE_SIZE*8) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 1a22dfd42df..bc7704b3a44 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk, } } -static void stop_process_timers(struct task_struct *tsk) +static void stop_process_timers(struct signal_struct *sig) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = &sig->cputimer; unsigned long flags; if (!cputimer->running) @@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk) spin_lock_irqsave(&cputimer->lock, flags); cputimer->running = 0; spin_unlock_irqrestore(&cputimer->lock, flags); + + sig->cputime_expires.prof_exp = cputime_zero; + sig->cputime_expires.virt_exp = cputime_zero; + sig->cputime_expires.sched_exp = 0; } static u32 onecputick; @@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk, list_empty(&timers[CPUCLOCK_VIRT]) && cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && list_empty(&timers[CPUCLOCK_SCHED])) { - stop_process_timers(tsk); + stop_process_timers(sig); return; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index da5288ec239..aa9e916da4d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -22,6 +22,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> +#include <linux/gfp.h> #include <scsi/scsi_scan.h> #include <asm/suspend.h> diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c index 39ac698ef83..fdcad9ed5a7 100644 --- a/kernel/power/hibernate_nvs.c +++ b/kernel/power/hibernate_nvs.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/suspend.h> /* diff --git a/kernel/power/process.c b/kernel/power/process.c index 5ade1bdcf36..71ae29052ab 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -88,12 +88,11 @@ static int try_to_freeze_tasks(bool sig_only) printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo); - show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p)) - printk(KERN_ERR " %s\n", p->comm); + sched_show_task(p); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); @@ -145,7 +144,7 @@ static void thaw_tasks(bool nosig_only) if (nosig_only && should_send_signal(p)) continue; - if (cgroup_frozen(p)) + if (cgroup_freezing_or_frozen(p)) continue; thaw_process(p); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 830cadecbdf..be861c26dda 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -26,6 +26,7 @@ #include <linux/console.h> #include <linux/highmem.h> #include <linux/list.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 44cce10b582..56e7dbb8b99 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -15,6 +15,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/syscalls.h> +#include <linux/gfp.h> #include "power.h" diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 1d575733d4e..66824d71983 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -23,6 +23,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/pm.h> +#include <linux/slab.h> #include "power.h" diff --git a/kernel/power/user.c b/kernel/power/user.c index 4d2289626a8..a8c96212bc1 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -420,7 +420,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, * User space encodes device types as two-byte values, * so we need to recode them */ - swdev = old_decode_dev(swap_area.dev); + swdev = new_decode_dev(swap_area.dev); if (swdev) { offset = swap_area.offset; data->swap = swap_type_of(swdev, offset, NULL); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f1125c1a632..49d808e833b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include <linux/mutex.h> #include <linux/module.h> #include <linux/kernel_stat.h> +#include <linux/hardirq.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -66,6 +67,35 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map); int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int debug_lockdep_rcu_enabled(void) +{ + return rcu_scheduler_active && debug_locks && + current->lockdep_recursion == 0; +} +EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); + +/** + * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * + * Check for bottom half being disabled, which covers both the + * CONFIG_PROVE_RCU and not cases. Note that if someone uses + * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) + * will show the situation. + * + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. + */ +int rcu_read_lock_bh_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + return in_softirq(); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); + +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain @@ -92,3 +122,14 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } + +#ifdef CONFIG_PROVE_RCU +/* + * wrapper function to avoid #include problems. + */ +int rcu_my_thread_group_empty(void) +{ + return thread_group_empty(current); +} +EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); +#endif /* #ifdef CONFIG_PROVE_RCU */ diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bcdabf37c40..c7eaa37a768 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/parser.h> #include <linux/fs.h> -#include <linux/slab.h> #include <linux/res_counter.h> #include <linux/uaccess.h> #include <linux/mm.h> diff --git a/kernel/resource.c b/kernel/resource.c index 2d5be5d9bf5..9c358e26353 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -219,19 +219,34 @@ void release_child_resources(struct resource *r) } /** - * request_resource - request and reserve an I/O or memory resource + * request_resource_conflict - request and reserve an I/O or memory resource * @root: root resource descriptor * @new: resource descriptor desired by caller * - * Returns 0 for success, negative error code on error. + * Returns 0 for success, conflict resource on error. */ -int request_resource(struct resource *root, struct resource *new) +struct resource *request_resource_conflict(struct resource *root, struct resource *new) { struct resource *conflict; write_lock(&resource_lock); conflict = __request_resource(root, new); write_unlock(&resource_lock); + return conflict; +} + +/** + * request_resource - request and reserve an I/O or memory resource + * @root: root resource descriptor + * @new: resource descriptor desired by caller + * + * Returns 0 for success, negative error code on error. + */ +int request_resource(struct resource *root, struct resource *new) +{ + struct resource *conflict; + + conflict = request_resource_conflict(root, new); return conflict ? -EBUSY : 0; } @@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou } /** - * insert_resource - Inserts a resource in the resource tree + * insert_resource_conflict - Inserts resource in the resource tree * @parent: parent of the new resource * @new: new resource to insert * - * Returns 0 on success, -EBUSY if the resource can't be inserted. + * Returns 0 on success, conflict resource if the resource can't be inserted. * - * This function is equivalent to request_resource when no conflict + * This function is equivalent to request_resource_conflict when no conflict * happens. If a conflict happens, and the conflicting resources * entirely fit within the range of the new resource, then the new * resource is inserted and the conflicting resources become children of * the new resource. */ -int insert_resource(struct resource *parent, struct resource *new) +struct resource *insert_resource_conflict(struct resource *parent, struct resource *new) { struct resource *conflict; write_lock(&resource_lock); conflict = __insert_resource(parent, new); write_unlock(&resource_lock); + return conflict; +} + +/** + * insert_resource - Inserts a resource in the resource tree + * @parent: parent of the new resource + * @new: new resource to insert + * + * Returns 0 on success, -EBUSY if the resource can't be inserted. + */ +int insert_resource(struct resource *parent, struct resource *new) +{ + struct resource *conflict; + + conflict = insert_resource_conflict(parent, new); return conflict ? -EBUSY : 0; } diff --git a/kernel/sched.c b/kernel/sched.c index 9ab3cd7858d..3c2a54f70ff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> +#include <linux/slab.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -322,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p) /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { + /* + * Strictly speaking this rcu_read_lock() is not needed since the + * task_group is tied to the cgroup, which in turn can never go away + * as long as there are tasks attached to it. + * + * However since task_group() uses task_subsys_state() which is an + * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. + */ + rcu_read_lock(); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; p->se.parent = task_group(p)->se[cpu]; @@ -331,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) p->rt.rt_rq = task_group(p)->rt_rq[cpu]; p->rt.parent = task_group(p)->rt_se[cpu]; #endif + rcu_read_unlock(); } #else @@ -2650,7 +2661,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; struct rq *rq; - int cpu = get_cpu(); + int cpu __maybe_unused = get_cpu(); #ifdef CONFIG_SMP /* @@ -3779,7 +3790,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) * the mutex owner just released it and exited. */ if (probe_kernel_address(&owner->cpu, cpu)) - goto out; + return 0; #else cpu = owner->cpu; #endif @@ -3789,14 +3800,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) * the cpu field may no longer be valid. */ if (cpu >= nr_cpumask_bits) - goto out; + return 0; /* * We need to validate that we can do a * get_cpu() and that we have the percpu area. */ if (!cpu_online(cpu)) - goto out; + return 0; rq = cpu_rq(cpu); @@ -3815,7 +3826,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) cpu_relax(); } -out: + return 1; } #endif @@ -4902,7 +4913,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, int ret; cpumask_var_t mask; - if (len < cpumask_size()) + if ((len * BITS_PER_BYTE) < nr_cpu_ids) + return -EINVAL; + if (len & (sizeof(unsigned long)-1)) return -EINVAL; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) @@ -4910,10 +4923,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ret = sched_getaffinity(pid, mask); if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) + size_t retlen = min_t(size_t, len, cpumask_size()); + + if (copy_to_user(user_mask_ptr, mask, retlen)) ret = -EFAULT; else - ret = cpumask_size(); + ret = retlen; } free_cpumask_var(mask); @@ -5383,7 +5398,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) get_task_struct(mt); task_rq_unlock(rq, &flags); - wake_up_process(rq->migration_thread); + wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); tlb_migrate_finish(p->mm); diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index fccf9fbb0d7..e6871cb3fc8 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -27,6 +27,7 @@ * of the License. */ +#include <linux/gfp.h> #include "sched_cpupri.h" /* Convert between a 140 based task->prio, and our 102 based cpupri */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 67f95aada4b..19be00ba612 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -114,7 +114,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { char path[64]; + rcu_read_lock(); cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); + rcu_read_unlock(); SEQ_printf(m, " %s", path); } #endif @@ -518,8 +520,4 @@ void proc_sched_set_task(struct task_struct *p) p->se.nr_wakeups_idle = 0; p->sched_info.bkl_count = 0; #endif - p->se.sum_exec_runtime = 0; - p->se.prev_sum_exec_runtime = 0; - p->nvcsw = 0; - p->nivcsw = 0; } diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 7494bbf5a27..7d3f4fa9ef4 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, goto cancelled; /* the timer holds a reference whilst it is pending */ - ret = work->ops->get_ref(work); + ret = slow_work_get_ref(work); if (ret < 0) goto cant_get_ref; diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 321f3c59d73..a29ebd1ef41 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); */ static inline void slow_work_set_thread_pid(int id, pid_t pid) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_pids[id] = pid; #endif } static inline void slow_work_mark_time(struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG work->mark = CURRENT_TIME; #endif } static inline void slow_work_begin_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_execs[id] = work; #endif } static inline void slow_work_end_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG write_lock(&slow_work_execs_lock); slow_work_execs[id] = NULL; write_unlock(&slow_work_execs_lock); diff --git a/kernel/smp.c b/kernel/smp.c index 9867b6bfefc..3fc69733618 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/init.h> +#include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0d4c7898ab8..4b493f67dcb 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -155,11 +155,11 @@ void softlockup_tick(void) * Wake up the high-prio watchdog task twice per * threshold timespan. */ - if (now > touch_ts + softlockup_thresh/2) + if (time_after(now - softlockup_thresh/2, touch_ts)) wake_up_process(per_cpu(softlockup_watchdog, this_cpu)); /* Warn about unreasonable delays: */ - if (now <= (touch_ts + softlockup_thresh)) + if (time_before_eq(now - softlockup_thresh, touch_ts)) return; per_cpu(softlockup_print_ts, this_cpu) = touch_ts; diff --git a/kernel/srcu.c b/kernel/srcu.c index bde4295774c..2980da3fd50 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -30,7 +30,6 @@ #include <linux/preempt.h> #include <linux/rcupdate.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/smp.h> #include <linux/srcu.h> diff --git a/kernel/sys.c b/kernel/sys.c index 8298878f4f7..7cb426a5896 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -36,6 +36,7 @@ #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/fs_struct.h> +#include <linux/gfp.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -1117,7 +1118,7 @@ DECLARE_RWSEM(uts_sem); #ifdef COMPAT_UTS_MACHINE #define override_architecture(name) \ - (current->personality == PER_LINUX32 && \ + (personality(current->personality) == PER_LINUX32 && \ copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ sizeof(COMPAT_UTS_MACHINE))) #else diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 8cd50d8f9bd..59030570f5c 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/ctype.h> #include <linux/netdevice.h> +#include <linux/slab.h> #ifdef CONFIG_SYSCTL_SYSCALL diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 899ca51be5e..11281d5792b 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -22,6 +22,7 @@ #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <linux/cgroupstats.h> #include <linux/cgroup.h> #include <linux/fs.h> diff --git a/kernel/time.c b/kernel/time.c index 804798005d1..656dccfe1cb 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -35,7 +35,6 @@ #include <linux/syscalls.h> #include <linux/security.h> #include <linux/fs.h> -#include <linux/slab.h> #include <linux/math64.h> #include <linux/ptrace.h> diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 0a8a213016f..aada0e52680 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -22,6 +22,29 @@ #include "tick-internal.h" +/* Limit min_delta to a jiffie */ +#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) + +static int tick_increase_min_delta(struct clock_event_device *dev) +{ + /* Nothing to do if we already reached the limit */ + if (dev->min_delta_ns >= MIN_DELTA_LIMIT) + return -ETIME; + + if (dev->min_delta_ns < 5000) + dev->min_delta_ns = 5000; + else + dev->min_delta_ns += dev->min_delta_ns >> 1; + + if (dev->min_delta_ns > MIN_DELTA_LIMIT) + dev->min_delta_ns = MIN_DELTA_LIMIT; + + printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); + return 0; +} + /** * tick_program_event internal worker function */ @@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, if (!ret || !force) return ret; + dev->retries++; /* - * We tried 2 times to program the device with the given - * min_delta_ns. If that's not working then we double it + * We tried 3 times to program the device with the given + * min_delta_ns. If that's not working then we increase it * and emit a warning. */ if (++i > 2) { /* Increase the min. delta and try again */ - if (!dev->min_delta_ns) - dev->min_delta_ns = 5000; - else - dev->min_delta_ns += dev->min_delta_ns >> 1; - - printk(KERN_WARNING - "CE: %s increasing min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns << 1); - + if (tick_increase_min_delta(dev)) { + /* + * Get out of the loop if min_delta_ns + * hit the limit already. That's + * better than staying here forever. + * + * We clear next_event so we have a + * chance that the box survives. + */ + printk(KERN_WARNING + "CE: Reprogramming failure. Giving up\n"); + dev->next_event.tv64 = KTIME_MAX; + return -ETIME; + } i = 0; } diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index 12f5c55090b..ac38fbb176c 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -19,6 +19,7 @@ #include <linux/timecompare.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/math64.h> /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 16736379a9c..39f6177fafa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -818,7 +818,8 @@ void update_wall_time(void) shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { offset = logarithmic_accumulation(offset, shift); - shift--; + if(offset < timekeeper.cycle_interval<<shift) + shift--; } /* correct the clock when NTP error is too big */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index bdfb8dd1050..1a4a7dd7877 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); SEQ_printf(m, "\n"); + SEQ_printf(m, " retries: %lu\n", dev->retries); } static void timer_list_show_tickdevices(struct seq_file *m) @@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.5\n"); + SEQ_printf(m, "Timer List Version: v0.6\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387..aeb6a54f277 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -39,6 +39,7 @@ #include <linux/kallsyms.h> #include <linux/perf_event.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -880,6 +881,7 @@ int try_to_del_timer_sync(struct timer_list *timer) if (base->running_timer == timer) goto out; + timer_stats_timer_clear_start_info(timer); ret = 0; if (timer_pending(timer)) { detach_timer(timer, 1); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 07f945a9943..b3bc91a3f51 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -21,6 +21,7 @@ #include <linux/percpu.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/debugfs.h> #include <linux/smp_lock.h> #include <linux/time.h> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d9062f5cc0c..2404b59b309 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -24,6 +24,7 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/sysctl.h> +#include <linux/slab.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/hash.h> diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 9f4f565b01e..a22582a0616 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -9,7 +9,6 @@ #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/module.h> -#include <linux/slab.h> #define CREATE_TRACE_POINTS #include <trace/events/power.h> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 05a9f83b881..41ca394feb2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/list.h> @@ -207,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ +#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +# define RB_FORCE_8BYTE_ALIGNMENT 0 +# define RB_ARCH_ALIGNMENT RB_ALIGNMENT +#else +# define RB_FORCE_8BYTE_ALIGNMENT 1 +# define RB_ARCH_ALIGNMENT 8U +#endif + /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -1201,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; p = cpu_buffer->pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); free_buffer_page(bpage); } if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } @@ -1229,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(pages))) - return; + goto out; p = pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); @@ -1238,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } @@ -1547,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event, case 0: length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) event->array[0] = length; else event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); @@ -1722,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length) if (!length) length = 1; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ALIGNMENT); + length = ALIGN(length, RB_ARCH_ALIGNMENT); return length; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ec2ee6f656..44f916a0406 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -33,10 +33,10 @@ #include <linux/kdebug.h> #include <linux/string.h> #include <linux/rwsem.h> +#include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> -#include <linux/gfp.h> #include <linux/fs.h> #include "trace.h" diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 6fbfb8f417b..9d589d8dcd1 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void) int this_cpu; u64 now; - raw_local_irq_save(flags); + local_irq_save(flags); this_cpu = raw_smp_processor_id(); now = cpu_clock(this_cpu); @@ -110,7 +110,7 @@ u64 notrace trace_clock_global(void) arch_spin_unlock(&trace_clock_struct.lock); out: - raw_local_irq_restore(flags); + local_irq_restore(flags); return now; } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 81f691eb3a3..0565bb42566 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -17,7 +17,12 @@ EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); static char *perf_trace_buf; static char *perf_trace_buf_nmi; -typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ; +/* + * Force it to be aligned to unsigned long to avoid misaligned accesses + * suprises + */ +typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) + perf_trace_t; /* Count the events in use (per event id, not per instance) */ static int total_ref_count; @@ -130,6 +135,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, char *trace_buf, *raw_data; int pc, cpu; + BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + pc = preempt_count(); /* Protect the per cpu buffer, begin the rcu read side */ @@ -152,7 +159,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); entry = (struct trace_entry *)raw_data; tracing_generic_entry_update(entry, *irq_flags, pc); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index beab8bf2f31..c697c704334 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/slab.h> #include <linux/delay.h> #include <asm/setup.h> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 4615f62a04f..88c0b6dbd7f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -22,6 +22,7 @@ #include <linux/ctype.h> #include <linux/mutex.h> #include <linux/perf_event.h> +#include <linux/slab.h> #include "trace.h" #include "trace_output.h" diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e6989d9b44d..9aed1a5cf55 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -9,6 +9,7 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/ftrace.h> +#include <linux/slab.h> #include <linux/fs.h> #include "trace.h" diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 94103cdcf9d..d59cd687947 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -23,6 +23,7 @@ #include <linux/debugfs.h> #include <linux/ftrace.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/fs.h> #include "trace_output.h" diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0acd834659e..017fa376505 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> +#include <linux/slab.h> #include <linux/time.h> #include <asm/atomic.h> diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 280fea470d6..81003b4d617 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,6 +3,7 @@ #include <linux/stringify.h> #include <linux/kthread.h> #include <linux/delay.h> +#include <linux/slab.h> static inline int trace_valid_entry(struct trace_entry *entry) { diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index a4bb239eb98..96cffb269e7 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -10,6 +10,7 @@ #include <linux/list.h> +#include <linux/slab.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include "trace_stat.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 33c2a5b769d..4d6d711717f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,5 +1,6 @@ #include <trace/syscall.h> #include <trace/events/syscalls.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/ftrace.h> #include <linux/perf_event.h> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 40cafb07dff..cc2d2faa7d9 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -9,6 +9,7 @@ #include <trace/events/workqueue.h> #include <linux/list.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <linux/kref.h> #include "trace_stat.h" #include "trace.h" diff --git a/kernel/workqueue.c b/kernel/workqueue.c index dee48658805..5bfb213984b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -774,7 +774,7 @@ void flush_delayed_work(struct delayed_work *dwork) { if (del_timer_sync(&dwork->timer)) { struct cpu_workqueue_struct *cwq; - cwq = wq_per_cpu(keventd_wq, get_cpu()); + cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu()); __queue_work(cwq, &dwork->work); put_cpu(); } |