From f1aaf26224bee779012aab136e5373ce3487982c Mon Sep 17 00:00:00 2001 From: Imre Palik Date: Mon, 23 Feb 2015 15:37:59 -0500 Subject: audit: move the tree pruning to a dedicated thread When file auditing is enabled, during a low memory situation, a memory allocation with __GFP_FS can lead to pruning the inode cache. Which can, in turn lead to audit_tree_freeing_mark() being called. This can call audit_schedule_prune(), that tries to fork a pruning thread, and waits until the thread is created. But forking needs memory, and the memory allocations there are done with __GFP_FS. So we are waiting merrily for some __GFP_FS memory allocations to complete, while holding some filesystem locks. This can take a while ... This patch creates a single thread for pruning the tree from audit_add_tree_rule(), and thus avoids the deadlock that the on-demand thread creation can cause. Reported-by: Matt Wilson Cc: Matt Wilson Signed-off-by: Imre Palik Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit_tree.c | 88 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 80f29e015570..415072c8e875 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -37,6 +37,7 @@ struct audit_chunk { static LIST_HEAD(tree_list); static LIST_HEAD(prune_list); +static struct task_struct *prune_thread; /* * One struct chunk is attached to each inode of interest. @@ -651,6 +652,57 @@ static int tag_mount(struct vfsmount *mnt, void *arg) return tag_chunk(mnt->mnt_root->d_inode, arg); } +/* + * That gets run when evict_chunk() ends up needing to kill audit_tree. + * Runs from a separate thread. + */ +static int prune_tree_thread(void *unused) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&prune_list)) + schedule(); + __set_current_state(TASK_RUNNING); + + mutex_lock(&audit_cmd_mutex); + mutex_lock(&audit_filter_mutex); + + while (!list_empty(&prune_list)) { + struct audit_tree *victim; + + victim = list_entry(prune_list.next, + struct audit_tree, list); + list_del_init(&victim->list); + + mutex_unlock(&audit_filter_mutex); + + prune_one(victim); + + mutex_lock(&audit_filter_mutex); + } + + mutex_unlock(&audit_filter_mutex); + mutex_unlock(&audit_cmd_mutex); + } + return 0; +} + +static int audit_launch_prune(void) +{ + if (prune_thread) + return 0; + prune_thread = kthread_create(prune_tree_thread, NULL, + "audit_prune_tree"); + if (IS_ERR(prune_thread)) { + pr_err("cannot start thread audit_prune_tree"); + prune_thread = NULL; + return -ENOMEM; + } else { + wake_up_process(prune_thread); + return 0; + } +} + /* called with audit_filter_mutex */ int audit_add_tree_rule(struct audit_krule *rule) { @@ -674,6 +726,12 @@ int audit_add_tree_rule(struct audit_krule *rule) /* do not set rule->tree yet */ mutex_unlock(&audit_filter_mutex); + if (unlikely(!prune_thread)) { + err = audit_launch_prune(); + if (err) + goto Err; + } + err = kern_path(tree->pathname, 0, &path); if (err) goto Err; @@ -811,36 +869,10 @@ int audit_tag_tree(char *old, char *new) return failed; } -/* - * That gets run when evict_chunk() ends up needing to kill audit_tree. - * Runs from a separate thread. - */ -static int prune_tree_thread(void *unused) -{ - mutex_lock(&audit_cmd_mutex); - mutex_lock(&audit_filter_mutex); - - while (!list_empty(&prune_list)) { - struct audit_tree *victim; - - victim = list_entry(prune_list.next, struct audit_tree, list); - list_del_init(&victim->list); - - mutex_unlock(&audit_filter_mutex); - - prune_one(victim); - - mutex_lock(&audit_filter_mutex); - } - - mutex_unlock(&audit_filter_mutex); - mutex_unlock(&audit_cmd_mutex); - return 0; -} static void audit_schedule_prune(void) { - kthread_run(prune_tree_thread, NULL, "audit_prune_tree"); + wake_up_process(prune_thread); } /* @@ -907,9 +939,9 @@ static void evict_chunk(struct audit_chunk *chunk) for (n = 0; n < chunk->count; n++) list_del_init(&chunk->owners[n].list); spin_unlock(&hash_lock); + mutex_unlock(&audit_filter_mutex); if (need_prune) audit_schedule_prune(); - mutex_unlock(&audit_filter_mutex); } static int audit_tree_handle_event(struct fsnotify_group *group, -- cgit v1.2.3 From a77ed4e5689627b0e9ac0a3532521b31c6daa99c Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 23 Feb 2015 15:37:59 -0500 Subject: audit: don't lose set wait time on first successful call to audit_log_start() Copy the set wait time to a working value to avoid losing the set value if the queue overflows. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index c7e097a0d7af..2125cc40e921 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -107,6 +107,7 @@ static u32 audit_rate_limit; * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) +static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_overflow = 0; @@ -338,7 +339,7 @@ static int audit_set_backlog_limit(u32 limit) static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", - &audit_backlog_wait_time, timeout); + &audit_backlog_wait_time_master, timeout); } static int audit_set_enabled(u32 state) @@ -843,7 +844,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_skb_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; - s.backlog_wait_time = audit_backlog_wait_time; + s.backlog_wait_time = audit_backlog_wait_time_master; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -1394,7 +1395,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, return NULL; } - audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; + audit_backlog_wait_time = audit_backlog_wait_time_master; ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { -- cgit v1.2.3 From efef73a1a206c4b5e37e5c63a361243ed1603eff Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 23 Feb 2015 15:38:00 -0500 Subject: audit: don't reset working wait time accidentally with auditd During a queue overflow condition while we are waiting for auditd to drain the queue to make room for regular messages, we don't want a successful auditd that has bypassed the queue check to reset the backlog wait time. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 2125cc40e921..59d4ceb3b2a6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1395,7 +1395,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, return NULL; } - audit_backlog_wait_time = audit_backlog_wait_time_master; + if (!reserve) + audit_backlog_wait_time = audit_backlog_wait_time_master; ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { -- cgit v1.2.3 From 5985de6754a6fc22fbf7e4b6033d4bfa0240a63a Mon Sep 17 00:00:00 2001 From: Ameen Ali Date: Mon, 23 Feb 2015 15:38:00 -0500 Subject: audit: code clean up Fixed a coding style issue (unnecessary parentheses , unnecessary braces) Signed-off-by: Ameen-Ali [PM: tweaked subject line] Signed-off-by: Paul Moore --- kernel/audit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 59d4ceb3b2a6..d219bb03a364 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -673,7 +673,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_MAKE_EQUIV: /* Only support auditd and auditctl in initial pid namespace * for now. */ - if ((task_active_pid_ns(current) != &init_pid_ns)) + if (task_active_pid_ns(current) != &init_pid_ns) return -EPERM; if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) @@ -1770,7 +1770,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, } else audit_log_format(ab, " name=(null)"); - if (n->ino != (unsigned long)-1) { + if (n->ino != (unsigned long)-1) audit_log_format(ab, " inode=%lu" " dev=%02x:%02x mode=%#ho" " ouid=%u ogid=%u rdev=%02x:%02x", @@ -1782,7 +1782,6 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, from_kgid(&init_user_ns, n->gid), MAJOR(n->rdev), MINOR(n->rdev)); - } if (n->osid != 0) { char *ctx = NULL; u32 len; -- cgit v1.2.3 From 4766b199ef9e1ca6316ee4f8f9d80c2ba1ed0290 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 22 Feb 2015 18:20:00 -0800 Subject: audit: consolidate handling of mm->exe_file This patch adds a audit_log_d_path_exe() helper function to share how we handle auditing of the exe_file's path. Used by both audit and auditsc. No functionality is changed. Signed-off-by: Davidlohr Bueso [PM: tweaked subject line] Signed-off-by: Paul Moore --- kernel/audit.c | 23 +++++++++++++++-------- kernel/audit.h | 3 +++ kernel/auditsc.c | 9 +-------- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index d219bb03a364..684b51d612a3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1848,11 +1848,24 @@ error_path: } EXPORT_SYMBOL(audit_log_task_context); +void audit_log_d_path_exe(struct audit_buffer *ab, + struct mm_struct *mm) +{ + if (!mm) { + audit_log_format(ab, " exe=(null)"); + return; + } + + down_read(&mm->mmap_sem); + if (mm->exe_file) + audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); + up_read(&mm->mmap_sem); +} + void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - struct mm_struct *mm = tsk->mm; char *tty; if (!ab) @@ -1888,13 +1901,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - if (mm) { - down_read(&mm->mmap_sem); - if (mm->exe_file) - audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); - up_read(&mm->mmap_sem); - } else - audit_log_format(ab, " exe=(null)"); + audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } EXPORT_SYMBOL(audit_log_task_info); diff --git a/kernel/audit.h b/kernel/audit.h index 3cdffad5a1d9..28067c50fd04 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -270,6 +270,9 @@ extern struct list_head audit_filter_list[]; extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); +extern void audit_log_d_path_exe(struct audit_buffer *ab, + struct mm_struct *mm); + /* audit watch functions */ #ifdef CONFIG_AUDIT_WATCH extern void audit_put_watch(struct audit_watch *watch); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 793e9e98f7f8..4b89f7f95d84 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2460,7 +2460,6 @@ static void audit_log_task(struct audit_buffer *ab) kuid_t auid, uid; kgid_t gid; unsigned int sessionid; - struct mm_struct *mm = current->mm; char comm[sizeof(current->comm)]; auid = audit_get_loginuid(current); @@ -2475,13 +2474,7 @@ static void audit_log_task(struct audit_buffer *ab) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", task_pid_nr(current)); audit_log_untrustedstring(ab, get_task_comm(comm, current)); - if (mm) { - down_read(&mm->mmap_sem); - if (mm->exe_file) - audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); - up_read(&mm->mmap_sem); - } else - audit_log_format(ab, " exe=(null)"); + audit_log_d_path_exe(ab, current->mm); } /** -- cgit v1.2.3 From 5b28255278dd7e594c8dde317c2498b7dcbf900d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 22 Feb 2015 18:20:09 -0800 Subject: audit: reduce mmap_sem hold for mm->exe_file The mm->exe_file is currently serialized with mmap_sem (shared) in order to both safely (1) read the file and (2) audit it via audit_log_d_path(). Good users will, on the other hand, make use of the more standard get_mm_exe_file(), requiring only holding the mmap_sem to read the value, and relying on reference counting to make sure that the exe file won't dissapear underneath us. Additionally, upon NULL return of get_mm_exe_file, we also call audit_log_format(ab, " exe=(null)"). Signed-off-by: Davidlohr Bueso [PM: tweaked subject line] Signed-off-by: Paul Moore --- kernel/audit.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 684b51d612a3..52ee8eee0e07 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -43,6 +43,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1851,15 +1852,20 @@ EXPORT_SYMBOL(audit_log_task_context); void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm) { - if (!mm) { - audit_log_format(ab, " exe=(null)"); - return; - } + struct file *exe_file; + + if (!mm) + goto out_null; - down_read(&mm->mmap_sem); - if (mm->exe_file) - audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); - up_read(&mm->mmap_sem); + exe_file = get_mm_exe_file(mm); + if (!exe_file) + goto out_null; + + audit_log_d_path(ab, " exe=", &exe_file->f_path); + fput(exe_file); + return; +out_null: + audit_log_format(ab, " exe=(null)"); } void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) -- cgit v1.2.3 From 32a158325acf12842764b1681f53903673f2f22e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 27 Feb 2015 11:25:56 -0800 Subject: clockevents: export clockevents_unbind_device instead of clockevents_unbind It looks like clockevents_unbind is being exported by mistake as: - it is static; - it is not listed in include/linux/clockchips.h; - EXPORT_SYMBOL_GPL(clockevents_unbind) follows clockevents_unbind_device() implementation. I think clockevents_unbind_device should be exported instead. This is going to be used to teardown Hyper-V clockevent devices on module unload. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- kernel/time/clockevents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 55449909f114..888ecc114ddc 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -371,7 +371,7 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) mutex_unlock(&clockevents_mutex); return ret; } -EXPORT_SYMBOL_GPL(clockevents_unbind); +EXPORT_SYMBOL_GPL(clockevents_unbind_device); /** * clockevents_register_device - register a clock event device -- cgit v1.2.3 From f427c990e2d0fd30336b0c252aa7e38e4cffdea2 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 1 Mar 2015 11:05:47 -0500 Subject: console: Preserve index after console setup() Before register_console() calls the setup() method of the matched console, the registering console index is already equal to the index from the console command line; ie. newcon->index == c->index. This change is also required to support extensible console matching; (the command line index may have no relation to the console index assigned by the console-defined match() function). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 01cfd69c54c6..d261a7e5f51a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2479,7 +2479,6 @@ void register_console(struct console *newcon) newcon->setup(newcon, console_cmdline[i].options) != 0) break; newcon->flags |= CON_ENABLED; - newcon->index = c->index; if (i == selected_console) { newcon->flags |= CON_CONSDEV; preferred_console = selected_console; -- cgit v1.2.3 From 724e7bfcc566375158219c1454b4b6fc416b2c4a Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Wed, 11 Mar 2015 14:08:19 -0400 Subject: audit: Remove condition which always evaluates to false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 3e1d0bb6224f019893d1c498cc3327559d183674 ("audit: Convert int limit uses to u32"), by converting an int to u32, few conditions will always evaluate to false. These warnings were emitted during compilation: kernel/audit.c: In function ‘audit_set_enabled’: kernel/audit.c:347:2: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (state < AUDIT_OFF || state > AUDIT_LOCKED) ^ kernel/audit.c: In function ‘audit_receive_msg’: kernel/audit.c:880:9: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (s.backlog_wait_time < 0 || The following patch removes those unnecessary conditions. Signed-off-by: Pranith Kumar Signed-off-by: Paul Moore --- kernel/audit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 52ee8eee0e07..d5a1220c8620 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -346,7 +346,7 @@ static int audit_set_backlog_wait_time(u32 timeout) static int audit_set_enabled(u32 state) { int rc; - if (state < AUDIT_OFF || state > AUDIT_LOCKED) + if (state > AUDIT_LOCKED) return -EINVAL; rc = audit_do_config_change("audit_enabled", &audit_enabled, state); @@ -888,8 +888,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) { if (sizeof(s) > (size_t)nlh->nlmsg_len) return -EINVAL; - if (s.backlog_wait_time < 0 || - s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) + if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) return -EINVAL; err = audit_set_backlog_wait_time(s.backlog_wait_time); if (err < 0) -- cgit v1.2.3 From 7b63c3ab9b10b8a9cdc6b3a9e0e3d6a8a5d28970 Mon Sep 17 00:00:00 2001 From: Yannick Guerrini Date: Tue, 24 Mar 2015 12:31:40 +1030 Subject: kernel/module.c: fix typos in message about unused symbols Fix typos in pr_warn message about unused symbols Signed-off-by: Yannick Guerrini Signed-off-by: Rusty Russell --- kernel/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index b3d634ed06c9..3ab942f78760 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -387,9 +387,9 @@ static bool check_symbol(const struct symsearch *syms, pr_warn("Symbol %s is marked as UNUSED, however this module is " "using it.\n", fsa->name); pr_warn("This symbol will go away in the future.\n"); - pr_warn("Please evalute if this is the right api to use and if " - "it really is, submit a report the linux kernel " - "mailinglist together with submitting your code for " + pr_warn("Please evaluate if this is the right api to use and " + "if it really is, submit a report to the linux kernel " + "mailing list together with submitting your code for " "inclusion.\n"); } #endif -- cgit v1.2.3 From cc9e605dc6cb2e32fedae4ac2f61ad3b5f8d623d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 24 Mar 2015 12:31:40 +1030 Subject: module: do not print allocation-fail warning on bogus user buffer size init_module(2) passes user-specified buffer length directly to vmalloc(). It makes warn_alloc_failed() to print out a lot of info into dmesg if user specified insane size, like -1. Let's silence the warning. It doesn't add much value to -ENOMEM return code. Without the patch the syscall is prohibitive noisy for testing with trinity. Signed-off-by: Kirill A. Shutemov Cc: Dave Jones Cc: Sasha Levin Signed-off-by: Rusty Russell --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 3ab942f78760..65bd206e04a9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2494,7 +2494,8 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, return err; /* Suck in entire file: we'll want most of it. */ - info->hdr = vmalloc(info->len); + info->hdr = __vmalloc(info->len, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL); if (!info->hdr) return -ENOMEM; -- cgit v1.2.3 From c7cef0a84912cab3c9df8949b034e4aa62982ec9 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 9 Mar 2015 16:27:12 -0400 Subject: console: Add extensible console matching Add match() method to struct console which allows the console to perform console command line matching instead of (or in addition to) default console matching (ie., by fixed name and index). The match() method returns 0 to indicate a successful match; normal console matching occurs if no match() method is defined or the match() method returns non-zero. The match() method is expected to set the console index if required. Re-implement earlycon-to-console-handoff with direct matching of "console=uart|uart8250,..." to the 8250 ttyS console. Acked-by: Rob Herring Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 64 +++++++++++++++++++++++++++--------- drivers/tty/serial/8250/8250_early.c | 23 ------------- include/linux/console.h | 3 +- include/linux/serial_8250.h | 2 -- kernel/printk/printk.c | 52 ++++++++++------------------- 5 files changed, 67 insertions(+), 77 deletions(-) (limited to 'kernel') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index bd06ab790c64..924ee1e13828 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -3322,9 +3322,54 @@ static int serial8250_console_setup(struct console *co, char *options) return uart_set_options(port, co, baud, parity, bits, flow); } -static int serial8250_console_early_setup(void) +/** + * serial8250_console_match - non-standard console matching + * @co: registering console + * @name: name from console command line + * @idx: index from console command line + * @options: ptr to option string from console command line + * + * Only attempts to match console command lines of the form: + * console=uart<>,io|mmio|mmio32,, + * console=uart<>,,options + * This form is used to register an initial earlycon boot console and + * replace it with the serial8250_console at 8250 driver init. + * + * Performs console setup for a match (as required by interface) + * + * Returns 0 if console matches; otherwise non-zero to use default matching + */ +static int serial8250_console_match(struct console *co, char *name, int idx, + char *options) { - return serial8250_find_port_for_earlycon(); + char match[] = "uart"; /* 8250-specific earlycon name */ + unsigned char iotype; + unsigned long addr; + int i; + + if (strncmp(name, match, 4) != 0) + return -ENODEV; + + if (uart_parse_earlycon(options, &iotype, &addr, &options)) + return -ENODEV; + + /* try to match the port specified on the command line */ + for (i = 0; i < nr_uarts; i++) { + struct uart_port *port = &serial8250_ports[i].port; + + if (port->iotype != iotype) + continue; + if ((iotype == UPIO_MEM || iotype == UPIO_MEM32) && + (port->mapbase != addr)) + continue; + if (iotype == UPIO_PORT && port->iobase != addr) + continue; + + co->index = i; + return serial8250_console_setup(co, options); + } + + return -ENODEV; } static struct console serial8250_console = { @@ -3332,7 +3377,7 @@ static struct console serial8250_console = { .write = serial8250_console_write, .device = uart_console_device, .setup = serial8250_console_setup, - .early_setup = serial8250_console_early_setup, + .match = serial8250_console_match, .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &serial8250_reg, @@ -3346,19 +3391,6 @@ static int __init serial8250_console_init(void) } console_initcall(serial8250_console_init); -int serial8250_find_port(struct uart_port *p) -{ - int line; - struct uart_port *port; - - for (line = 0; line < nr_uarts; line++) { - port = &serial8250_ports[line].port; - if (uart_match_port(p, port)) - return line; - } - return -ENODEV; -} - #define SERIAL8250_CONSOLE &serial8250_console #else #define SERIAL8250_CONSOLE NULL diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index c31a22b4f845..49bca65057e6 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -173,26 +173,3 @@ int __init setup_early_serial8250_console(char *cmdline) return setup_earlycon(cmdline, match, early_serial8250_setup); } - -int serial8250_find_port_for_earlycon(void) -{ - struct earlycon_device *device = early_device; - struct uart_port *port = device ? &device->port : NULL; - int line; - int ret; - - if (!port || (!port->membase && !port->iobase)) - return -ENODEV; - - line = serial8250_find_port(port); - if (line < 0) - return -ENODEV; - - ret = update_console_cmdline("uart", 8250, - "ttyS", line, device->options); - if (ret < 0) - ret = update_console_cmdline("uart", 0, - "ttyS", line, device->options); - - return ret; -} diff --git a/include/linux/console.h b/include/linux/console.h index 7571a16bd653..9f50fb413c11 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -123,7 +123,7 @@ struct console { struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); int (*setup)(struct console *, char *); - int (*early_setup)(void); + int (*match)(struct console *, char *name, int idx, char *options); short flags; short index; int cflag; @@ -141,7 +141,6 @@ extern int console_set_on_cmdline; extern struct console *early_console; extern int add_preferred_console(char *name, int idx, char *options); -extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); extern int unregister_console(struct console *); extern struct console *console_drivers; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index a8efa235b7c1..f26ae7fa30ae 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -118,8 +118,6 @@ void serial8250_resume_port(int line); extern int early_serial_setup(struct uart_port *port); -extern int serial8250_find_port(struct uart_port *p); -extern int serial8250_find_port_for_earlycon(void); extern unsigned int serial8250_early_in(struct uart_port *port, int offset); extern void serial8250_early_out(struct uart_port *port, int offset, int value); extern int setup_early_serial8250_console(char *cmdline); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 26f899809539..dda959221086 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2017,24 +2017,6 @@ int add_preferred_console(char *name, int idx, char *options) return __add_preferred_console(name, idx, options, NULL); } -int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) -{ - struct console_cmdline *c; - int i; - - for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; - i++, c++) - if (strcmp(c->name, name) == 0 && c->index == idx) { - strlcpy(c->name, name_new, sizeof(c->name)); - c->options = options; - c->index = idx_new; - return i; - } - /* not found */ - return -1; -} - bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); @@ -2436,9 +2418,6 @@ void register_console(struct console *newcon) if (preferred_console < 0 || bcon || !console_drivers) preferred_console = selected_console; - if (newcon->early_setup) - newcon->early_setup(); - /* * See if we want to use this console driver. If we * didn't select a console we take the first one @@ -2464,21 +2443,26 @@ void register_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { - BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); - if (strcmp(c->name, newcon->name) != 0) - continue; - if (newcon->index >= 0 && - newcon->index != c->index) - continue; - if (newcon->index < 0) - newcon->index = c->index; + if (!newcon->match || + newcon->match(newcon, c->name, c->index, c->options) != 0) { + /* default matching */ + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); + if (strcmp(c->name, newcon->name) != 0) + continue; + if (newcon->index >= 0 && + newcon->index != c->index) + continue; + if (newcon->index < 0) + newcon->index = c->index; - if (_braille_register_console(newcon, c)) - return; + if (_braille_register_console(newcon, c)) + return; + + if (newcon->setup && + newcon->setup(newcon, c->options) != 0) + break; + } - if (newcon->setup && - newcon->setup(newcon, console_cmdline[i].options) != 0) - break; newcon->flags |= CON_ENABLED; if (i == selected_console) { newcon->flags |= CON_CONSDEV; -- cgit v1.2.3 From 8d7dc9283f399e1fda4e48a1c453f689326d9396 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Apr 2015 19:33:59 -0700 Subject: rcu: Control grace-period delays directly from value In a misguided attempt to avoid an #ifdef, the use of the gp_init_delay module parameter was conditioned on the corresponding RCU_TORTURE_TEST_SLOW_INIT Kconfig variable, using IS_ENABLED() at the point of use in the code. This meant that the compiler always saw the delay, which meant that RCU_TORTURE_TEST_SLOW_INIT_DELAY had to be unconditionally defined. This in turn caused "make oldconfig" to ask pointless questions about the value of RCU_TORTURE_TEST_SLOW_INIT_DELAY in cases where it was not even used. This commit avoids these pointless questions by defining gp_init_delay under #ifdef. In one branch, gp_init_delay is initialized to RCU_TORTURE_TEST_SLOW_INIT_DELAY and is also a module parameter (thus allowing boot-time modification), and in the other branch gp_init_delay is a const variable initialized by default to zero. This approach also simplifies the code at the delay point by eliminating the IS_DEFINED(). Because gp_init_delay is constant zero in the no-delay case intended for production use, the "gp_init_delay > 0" check causes the delay to become dead code, as desired in this case. In addition, this commit replaces magic constant "10" with the preprocessor variable PER_RCU_NODE_PERIOD, which controls the number of grace periods that are allowed to elapse at full speed before a delay is inserted. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 +++++++++------- lib/Kconfig.debug | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 233165da782f..8cf7304b2867 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); -/* Delay in jiffies for grace-period initialization delays. */ -static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) - ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY - : 0; +/* Delay in jiffies for grace-period initialization delays, debug only. */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT +static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +static const int gp_init_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && - gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * 10))) + if (gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) schedule_timeout_uninterruptible(gp_init_delay); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1ad74c0df01f..5f5ff7d7e5eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1268,6 +1268,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY int "How much to slow down RCU grace-period initialization" range 0 5 default 3 + depends on RCU_TORTURE_TEST_SLOW_INIT help This option specifies the number of jiffies to wait between each rcu_node structure initialization. -- cgit v1.2.3 From b9cc4489c68de59f7a38ef4e02a9829465a6a544 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 15 Apr 2015 13:23:48 +0930 Subject: params: handle quotes properly for values not of form foo="bar". When starting kernel with arguments like: init=/bin/sh -c "echo arguments" the trailing double quote is not removed which results in following command being executed: /bin/sh -c 'echo arguments"' Reported-by: Arthur Gautier Signed-off-by: Rusty Russell --- kernel/params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index 728e05b167de..a22d6a759b1a 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -173,9 +173,9 @@ static char *next_arg(char *args, char **param, char **val) if (args[i-1] == '"') args[i-1] = '\0'; } - if (quoted && args[i-1] == '"') - args[i-1] = '\0'; } + if (quoted && args[i-1] == '"') + args[i-1] = '\0'; if (args[i]) { args[i] = '\0'; -- cgit v1.2.3 From 3b362157b2162719145d4f3d4f534357d89b45ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:26:21 +0000 Subject: VFS: audit: d_backing_inode() annotations Signed-off-by: David Howells Signed-off-by: Al Viro --- kernel/audit.c | 2 +- kernel/audit_tree.c | 4 ++-- kernel/audit_watch.c | 14 +++++++------- kernel/auditsc.c | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 72ab759a0b43..f0603092f5cc 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1915,7 +1915,7 @@ void audit_log_link_denied(const char *operation, struct path *link) /* Generate AUDIT_PATH record with object. */ name->type = AUDIT_TYPE_NORMAL; - audit_copy_inode(name, link->dentry, link->dentry->d_inode); + audit_copy_inode(name, link->dentry, d_backing_inode(link->dentry)); audit_log_name(current->audit_context, name, link, 0, NULL); out: kfree(name); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2e0c97427b33..70bd2532437b 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -576,7 +576,7 @@ int audit_remove_tree_rule(struct audit_krule *rule) static int compare_root(struct vfsmount *mnt, void *arg) { - return mnt->mnt_root->d_inode == arg; + return d_backing_inode(mnt->mnt_root) == arg; } void audit_trim_trees(void) @@ -648,7 +648,7 @@ void audit_put_tree(struct audit_tree *tree) static int tag_mount(struct vfsmount *mnt, void *arg) { - return tag_chunk(mnt->mnt_root->d_inode, arg); + return tag_chunk(d_backing_inode(mnt->mnt_root), arg); } /* called with audit_filter_mutex */ diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index ad9c1682f616..6e30024d9aac 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -146,7 +146,7 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) /* Initialize a parent watch entry. */ static struct audit_parent *audit_init_parent(struct path *path) { - struct inode *inode = path->dentry->d_inode; + struct inode *inode = d_backing_inode(path->dentry); struct audit_parent *parent; int ret; @@ -361,11 +361,11 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - mutex_unlock(&parent->dentry->d_inode->i_mutex); - if (d->d_inode) { + mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex); + if (d_is_positive(d)) { /* update watch filter fields */ - watch->dev = d->d_inode->i_sb->s_dev; - watch->ino = d->d_inode->i_ino; + watch->dev = d_backing_inode(d)->i_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; } dput(d); return 0; @@ -426,7 +426,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) return ret; /* either find an old parent or attach a new one */ - parent = audit_find_parent(parent_path.dentry->d_inode); + parent = audit_find_parent(d_backing_inode(parent_path.dentry)); if (!parent) { parent = audit_init_parent(&parent_path); if (IS_ERR(parent)) { @@ -482,7 +482,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, switch (data_type) { case (FSNOTIFY_EVENT_PATH): - inode = ((struct path *)data)->dentry->d_inode; + inode = d_backing_inode(((struct path *)data)->dentry); break; case (FSNOTIFY_EVENT_INODE): inode = (struct inode *)data; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index dc4ae70a7413..9edcf813318c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1629,7 +1629,7 @@ retry: rcu_read_lock(); seq = read_seqbegin(&rename_lock); for(;;) { - struct inode *inode = d->d_inode; + struct inode *inode = d_backing_inode(d); if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); @@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags) { struct audit_context *context = current->audit_context; - const struct inode *inode = dentry->d_inode; + const struct inode *inode = d_backing_inode(dentry); struct audit_names *n; bool parent = flags & AUDIT_INODE_PARENT; @@ -1853,7 +1853,7 @@ void __audit_inode_child(const struct inode *parent, const unsigned char type) { struct audit_context *context = current->audit_context; - const struct inode *inode = dentry->d_inode; + const struct inode *inode = d_backing_inode(dentry); const char *dname = dentry->d_name.name; struct audit_names *n, *found_parent = NULL, *found_child = NULL; -- cgit v1.2.3 From 7682c918439d42291df0d76b3e06627f27fbcdef Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:26:16 +0000 Subject: VFS: kernel/: d_inode() annotations relayfs and tracefs are dealing with inodes of their own; those two act as filesystem drivers Signed-off-by: David Howells Signed-off-by: Al Viro --- kernel/relay.c | 4 ++-- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_events.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/relay.c b/kernel/relay.c index 5a56d3c8dc03..e9dbaeb8fd65 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf, struct dentry *dentry) { buf->dentry = dentry; - buf->dentry->d_inode->i_size = buf->early_bytes; + d_inode(buf->dentry)->i_size = buf->early_bytes; } static struct dentry *relay_create_buf_file(struct rchan *chan, @@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) buf->padding[old_subbuf] = buf->prev_padding; buf->subbufs_produced++; if (buf->dentry) - buf->dentry->d_inode->i_size += + d_inode(buf->dentry)->i_size += buf->chan->subbuf_size - buf->padding[old_subbuf]; else diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62c6506d663f..d44901bcd867 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5846,7 +5846,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, struct dentry *ret = trace_create_file(name, mode, parent, data, fops); if (ret) /* See tracing_get_cpu() */ - ret->d_inode->i_cdev = (void *)(cpu + 1); + d_inode(ret)->i_cdev = (void *)(cpu + 1); return ret; } @@ -6420,7 +6420,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry) return -ENOENT; /* The caller did a dget() on dentry */ - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_unlock(&d_inode(dentry)->i_mutex); /* * The inode mutex is locked, but debugfs_create_dir() will also @@ -6435,7 +6435,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry) ret = instance_delete(dentry->d_iname); mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock(&dentry->d_inode->i_mutex); + mutex_lock(&d_inode(dentry)->i_mutex); return ret; } @@ -6453,7 +6453,7 @@ static __init void create_trace_instances(struct dentry *d_tracer) return; /* Hijack the dir inode operations, to allow mkdir */ - trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations; + d_inode(trace_instance_dir)->i_op = &instance_dir_inode_operations; } static void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index db54dda10ccc..e19e7578a0d3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file) if (dir) { spin_lock(&dir->d_lock); /* probably unneeded */ list_for_each_entry(child, &dir->d_subdirs, d_child) { - if (child->d_inode) /* probably unneeded */ - child->d_inode->i_private = NULL; + if (d_really_is_positive(child)) /* probably unneeded */ + d_inode(child)->i_private = NULL; } spin_unlock(&dir->d_lock); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7dc1c8abecd6..cb3b37e533cd 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv) if (ret) goto fail_address_parse; - inode = igrab(path.dentry->d_inode); + inode = igrab(d_inode(path.dentry)); path_put(&path); if (!inode || !S_ISREG(inode->i_mode)) { -- cgit v1.2.3 From ef99b88b16bee753fa51207abdc58ae660453ec6 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 13 Apr 2015 22:30:12 +0200 Subject: tracing: Handle ftrace_dump() atomic context in graph_trace_open() graph_trace_open() can be called in atomic context from ftrace_dump(). Use GFP_ATOMIC for the memory allocations when that's the case, in order to avoid the following splat. BUG: sleeping function called from invalid context at mm/slab.c:2849 in_atomic(): 1, irqs_disabled(): 128, pid: 0, name: swapper/0 Backtrace: .. [<8004dc94>] (__might_sleep) from [<801371f4>] (kmem_cache_alloc_trace+0x160/0x238) r7:87800040 r6:000080d0 r5:810d16e8 r4:000080d0 [<80137094>] (kmem_cache_alloc_trace) from [<800cbd60>] (graph_trace_open+0x30/0xd0) r10:00000100 r9:809171a8 r8:00008e28 r7:810d16f0 r6:00000001 r5:810d16e8 r4:810d16f0 [<800cbd30>] (graph_trace_open) from [<800c79c4>] (trace_init_global_iter+0x50/0x9c) r8:00008e28 r7:808c853c r6:00000001 r5:810d16e8 r4:810d16f0 r3:800cbd30 [<800c7974>] (trace_init_global_iter) from [<800c7aa0>] (ftrace_dump+0x90/0x2ec) r4:810d2580 r3:00000000 [<800c7a10>] (ftrace_dump) from [<80414b2c>] (sysrq_ftrace_dump+0x1c/0x20) r10:00000100 r9:809171a8 r8:808f6e7c r7:00000001 r6:00000007 r5:0000007a r4:808d5394 [<80414b10>] (sysrq_ftrace_dump) from [<800169b8>] (return_to_handler+0x0/0x18) [<80415498>] (__handle_sysrq) from [<800169b8>] (return_to_handler+0x0/0x18) r8:808c8100 r7:808c8444 r6:00000101 r5:00000010 r4:84eb3210 [<80415668>] (handle_sysrq) from [<800169b8>] (return_to_handler+0x0/0x18) [<8042a760>] (pl011_int) from [<800169b8>] (return_to_handler+0x0/0x18) r10:809171bc r9:809171a8 r8:00000001 r7:00000026 r6:808c6000 r5:84f01e60 r4:8454fe00 [<8007782c>] (handle_irq_event_percpu) from [<80077b44>] (handle_irq_event+0x4c/0x6c) r10:808c7ef0 r9:87283e00 r8:00000001 r7:00000000 r6:8454fe00 r5:84f01e60 r4:84f01e00 [<80077af8>] (handle_irq_event) from [<8007aa28>] (handle_fasteoi_irq+0xf0/0x1ac) r6:808f52a4 r5:84f01e60 r4:84f01e00 r3:00000000 [<8007a938>] (handle_fasteoi_irq) from [<80076dc0>] (generic_handle_irq+0x3c/0x4c) r6:00000026 r5:00000000 r4:00000026 r3:8007a938 [<80076d84>] (generic_handle_irq) from [<80077128>] (__handle_domain_irq+0x8c/0xfc) r4:808c1e38 r3:0000002e [<8007709c>] (__handle_domain_irq) from [<800087b8>] (gic_handle_irq+0x34/0x6c) r10:80917748 r9:00000001 r8:88802100 r7:808c7ef0 r6:808c8fb0 r5:00000015 r4:8880210c r3:808c7ef0 [<80008784>] (gic_handle_irq) from [<80014044>] (__irq_svc+0x44/0x7c) Link: http://lkml.kernel.org/r/1428953721-31349-1-git-send-email-rabin@rab.in Link: http://lkml.kernel.org/r/1428957012-2319-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.13+ Signed-off-by: Rabin Vincent Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2d25ad1526bb..b6fce365ef27 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1309,15 +1309,19 @@ void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; + gfp_t gfpflags; int cpu; iter->private = NULL; - data = kzalloc(sizeof(*data), GFP_KERNEL); + /* We can be called in atomic context via ftrace_dump() */ + gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; + + data = kzalloc(sizeof(*data), gfpflags); if (!data) goto out_err; - data->cpu_data = alloc_percpu(struct fgraph_cpu_data); + data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags); if (!data->cpu_data) goto out_err_free; -- cgit v1.2.3 From 84fce9db4d7eaebd6cb2ee30c15da6d4e4daf846 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 16 Apr 2015 13:44:44 +0900 Subject: tracing: Fix incorrect enabling of trace events by boot cmdline There is a problem that trace events are not properly enabled with boot cmdline. The problem is that if we pass "trace_event=kmem:mm_page_alloc" to the boot cmdline, it enables all kmem trace events, and not just the page_alloc event. This is caused by the parsing mechanism. When we parse the cmdline, the buffer contents is modified due to tokenization. And, if we use this buffer again, we will get the wrong result. Unfortunately, this buffer is be accessed three times to set trace events properly at boot time. So, we need to handle this situation. There is already code handling ",", but we need another for ":". This patch adds it. Link: http://lkml.kernel.org/r/1429159484-22977-1-git-send-email-iamjoonsoo.kim@lge.com Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Joonsoo Kim [ added missing return ret; ] Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a576bbe75577..36a957c996c7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; + int ret; /* * The buf format can be : @@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) event = NULL; } - return __ftrace_set_clr_event(tr, match, sub, event, set); + ret = __ftrace_set_clr_event(tr, match, sub, event, set); + + /* Put back the colon to allow this to be called again */ + if (buf) + *(buf - 1) = ':'; + + return ret; } /** -- cgit v1.2.3 From 3193899d4dd54056f8c2e0b1e40dd6e2f0009f28 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 17 Apr 2015 10:27:57 -0400 Subject: tracing: Fix possible out of bounds memory access when parsing enums The code that replaces the enum names with the enum values in the tracepoints' format files could possible miss the end of string nul character. This was caused by processing things like backslashes, quotes and other tokens. After processing the tokens, a check for the nul character needed to be done before continuing the loop, because the loop incremented the pointer before doing the check, which could bypass the nul character. Link: http://lkml.kernel.org/r/552E661D.5060502@oracle.com Reported-by: Sasha Levin # via KASan Tested-by: Andrey Ryabinin Fixes: 0c564a538aa9 "tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values" Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 36a957c996c7..b49c107f82ac 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1760,6 +1760,8 @@ static void update_event_printk(struct ftrace_event_call *call, ptr++; /* Check for alpha chars like ULL */ } while (isalnum(*ptr)); + if (!*ptr) + break; /* * A number must have some kind of delimiter after * it, and we can ignore that too. @@ -1786,12 +1788,16 @@ static void update_event_printk(struct ftrace_event_call *call, do { ptr++; } while (isalnum(*ptr) || *ptr == '_'); + if (!*ptr) + break; /* * If what comes after this variable is a '.' or * '->' then we can continue to ignore that string. */ if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { ptr += *ptr == '.' ? 1 : 2; + if (!*ptr) + break; goto skip_more; } /* -- cgit v1.2.3 From 7e01b5acd88b3f3108d8c4ce44e3205d67437202 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 16 Apr 2015 14:47:33 +0200 Subject: kexec: allocate the kexec control page with KEXEC_CONTROL_MEMORY_GFP Introduce KEXEC_CONTROL_MEMORY_GFP to allow the architecture code to override the gfp flags of the allocation for the kexec control page. The loop in kimage_alloc_normal_control_pages allocates pages with GFP_KERNEL until a page is found that happens to have an address smaller than the KEXEC_CONTROL_MEMORY_LIMIT. On systems with a large memory size but a small KEXEC_CONTROL_MEMORY_LIMIT the loop will keep allocating memory until the oom killer steps in. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 3 +++ include/linux/kexec.h | 4 ++++ kernel/kexec.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 694bcd6bd927..2f924bc30e35 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -26,6 +26,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Allocate control page with GFP_DMA */ +#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA + /* Maximum address we can use for the crash control pages */ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e60a745ac198..e804306ef5e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -40,6 +40,10 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif +#ifndef KEXEC_CONTROL_MEMORY_GFP +#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#endif + #ifndef KEXEC_CONTROL_PAGE_SIZE #error KEXEC_CONTROL_PAGE_SIZE not defined #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 38c25b1f2fd5..7a36fdcca5bf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, do { unsigned long pfn, epfn, addr, eaddr; - pages = kimage_alloc_pages(GFP_KERNEL, order); + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; pfn = page_to_pfn(pages); -- cgit v1.2.3 From 10a50f1ab5f06c9a3ee5ece3ec52e607ed53c79f Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Apr 2015 11:14:11 +0300 Subject: genirq: Set IRQCHIP_SKIP_SET_WAKE flag for dummy_irq_chip Without this system suspend is broken on systems that have drivers calling enable/disable_irq_wake() for interrupts based off the dummy irq hook. (e.g. drivers/gpio/gpio-pcf857x.c) Signed-off-by: Roger Quadros Cc: Cc: Cc: Cc: Gregory Clement Link: http://lkml.kernel.org/r/552E1DD3.4040106@ti.com Signed-off-by: Thomas Gleixner --- kernel/irq/dummychip.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 988dc58e8847..2feb6feca0cc 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = { .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, + .flags = IRQCHIP_SKIP_SET_WAKE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); -- cgit v1.2.3 From 149aabcc44e3e2c1f8fe4f0832be53d2db55b598 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Apr 2015 12:56:41 +0530 Subject: clockevents: Shutdown detached clockevent device A clockevent device is marked DETACHED when it is replaced by another clockevent device. The device is shutdown properly for drivers that implement legacy ->set_mode() callback, as we call ->set_mode() for CLOCK_EVT_MODE_UNUSED as well. But for the new per-state callback interface, we skip shutting down the device, as we thought its an internal state change. That wasn't correct. The effect is that the device is left programmed in oneshot or periodic mode. Fall-back to 'case CLOCK_EVT_STATE_SHUTDOWN', to shutdown the device. Fixes: bd624d75db21 "clockevents: Introduce mode specific callbacks" Reported-by: Daniel Lezcano Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/eef0a91c51b74d4e52c8e5a95eca27b5a0563f07.1428650683.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 25d942d1da27..629be4e21e96 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev, /* Transition with new state-specific callbacks */ switch (state) { case CLOCK_EVT_STATE_DETACHED: - /* - * This is an internal state, which is guaranteed to go from - * SHUTDOWN to DETACHED. No driver interaction required. - */ - return 0; + /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: return dev->set_state_shutdown(dev); -- cgit v1.2.3 From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Apr 2015 13:20:18 +0200 Subject: x86: pvclock: Really remove the sched notifier for cross-cpu migrations This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra Suggested-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 1 - arch/x86/kernel/pvclock.c | 44 ------------------------------------------ arch/x86/vdso/vclock_gettime.c | 34 ++++++++++++++------------------ include/linux/sched.h | 8 -------- kernel/sched/core.c | 15 -------------- 5 files changed, 15 insertions(+), 87 deletions(-) (limited to 'kernel') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ - if (!pvclock_vdso_info) { - BUG(); - return NULL; - } - - return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ - return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} - #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, - void *v) -{ - struct task_migration_notifier *mn = v; - struct pvclock_vsyscall_time_info *pvti; - - pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - - /* this is NULL when pvclock vsyscall is not initialized */ - if (unlikely(pvti == NULL)) - return NOTIFY_DONE; - - pvti->migrate_count++; - - return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { - .notifier_call = pvclock_task_migrate, -}; - /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - pvclock_vdso_info = i; - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } - - register_task_migration_notifier(&pvclock_migrate); - return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * When looping to get a consistent (time-info, tsc) pair, we - * also need to deal with the possibility we can switch vcpus, - * so make sure we always re-fetch time-info for the current vcpu. + * Note: hypervisor must guarantee that: + * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. + * 2. that per-CPU pvclock time info is updated if the + * underlying CPU changes. + * 3. that version is increased whenever underlying CPU + * changes. + * */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - /* Make sure migrate_count will change if we leave the VCPU. */ - do { - pvti = get_pvti(cpu); - migrate_count = pvti->migrate_count; - - cpu1 = cpu; - cpu = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1)); + pvti = get_pvti(cpu); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * - We must read TSC of pvti's VCPU. - * - KVM doesn't follow the versioning protocol, so data could - * change before version if we left the VCPU. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. */ - smp_rmb(); - } while (unlikely((pvti->pvti.version & 1) || - pvti->pvti.version != version || - pvti->migrate_count != migrate_count)); + cpu1 = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -/* Notifier for when a task gets migrated to a new CPU */ -struct task_migration_notifier { - struct task_struct *task; - int from_cpu; - int to_cpu; -}; -extern void register_task_migration_notifier(struct notifier_block *n); - extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..fe22f7510bce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } -static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); - -void register_task_migration_notifier(struct notifier_block *n) -{ - atomic_notifier_chain_register(&task_migration_notifier, n); -} - #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { - struct task_migration_notifier tmn; - if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); - - tmn.task = p; - tmn.from_cpu = task_cpu(p); - tmn.to_cpu = new_cpu; - - atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); -- cgit v1.2.3 From 876a7ae65b86d8cec8efe7d15d050ac61116874e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 27 Apr 2015 14:40:37 -0700 Subject: bpf: fix 64-bit divide ALU64_DIV instruction should be dividing 64-bit by 64-bit, whereas do_div() does 64-bit by 32-bit divide. x64 and arm64 JITs correctly implement 64 by 64 unsigned divide. llvm BPF backend emits code assuming that ALU64_DIV does 64 by 64. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Reported-by: Michael Holzheu Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4139a0f8b558..54f0e7fcd0e2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -357,8 +357,8 @@ select_insn: ALU64_MOD_X: if (unlikely(SRC == 0)) return 0; - tmp = DST; - DST = do_div(tmp, SRC); + div64_u64_rem(DST, SRC, &tmp); + DST = tmp; CONT; ALU_MOD_X: if (unlikely(SRC == 0)) @@ -367,8 +367,8 @@ select_insn: DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - tmp = DST; - DST = do_div(tmp, IMM); + div64_u64_rem(DST, IMM, &tmp); + DST = tmp; CONT; ALU_MOD_K: tmp = (u32) DST; @@ -377,7 +377,7 @@ select_insn: ALU64_DIV_X: if (unlikely(SRC == 0)) return 0; - do_div(DST, SRC); + DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: if (unlikely(SRC == 0)) @@ -387,7 +387,7 @@ select_insn: DST = (u32) tmp; CONT; ALU64_DIV_K: - do_div(DST, IMM); + DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: tmp = (u32) DST; -- cgit v1.2.3 From df8d9eeadd0f7a216f2476351d5aee43c6550bf0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Apr 2015 15:19:21 +0200 Subject: cpuidle: Run tick_broadcast_exit() with disabled interrupts Commit 335f49196fd6 (sched/idle: Use explicit broadcast oneshot control function) replaced clockevents_notify() invocations in cpuidle_idle_call() with direct calls to tick_broadcast_enter() and tick_broadcast_exit(), but it overlooked the fact that interrupts were already enabled before calling the latter which led to functional breakage on systems using idle states with the CPUIDLE_FLAG_TIMER_STOP flag set. Fix that by moving the invocations of tick_broadcast_enter() and tick_broadcast_exit() down into cpuidle_enter_state() where interrupts are still disabled when tick_broadcast_exit() is called. Also ensure that interrupts will be disabled before running tick_broadcast_exit() even if they have been enabled by the idle state's ->enter callback. Trigger a WARN_ON_ONCE() in that case, as we generally don't want that to happen for states with CPUIDLE_FLAG_TIMER_STOP set. Fixes: 335f49196fd6 (sched/idle: Use explicit broadcast oneshot control function) Reported-and-tested-by: Linus Walleij Acked-by: Peter Zijlstra (Intel) Acked-by: Daniel Lezcano Reported-and-tested-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 16 ++++++++++++++++ kernel/sched/idle.c | 16 ++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7a73a279e179..61c417b9e53f 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -158,9 +158,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int entered_state; struct cpuidle_state *target_state = &drv->states[index]; + bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); ktime_t time_start, time_end; s64 diff; + /* + * Tell the time framework to switch to a broadcast timer because our + * local timer will be shut down. If a local timer is used from another + * CPU as a broadcast timer, this call may fail if it is not available. + */ + if (broadcast && tick_broadcast_enter()) + return -EBUSY; + trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ktime_get(); @@ -169,6 +178,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_end = ktime_get(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + if (broadcast) { + if (WARN_ON_ONCE(!irqs_disabled())) + local_irq_disable(); + + tick_broadcast_exit(); + } + if (!cpuidle_state_is_coupled(dev, drv, entered_state)) local_irq_enable(); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index deef1caa94c6..fefcb1fa5160 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -81,7 +81,6 @@ static void cpuidle_idle_call(void) struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - unsigned int broadcast; bool reflect; /* @@ -150,17 +149,6 @@ static void cpuidle_idle_call(void) goto exit_idle; } - broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; - - /* - * Tell the time framework to switch to a broadcast timer - * because our local timer will be shutdown. If a local timer - * is used from another cpu as a broadcast timer, this call may - * fail if it is not available - */ - if (broadcast && tick_broadcast_enter()) - goto use_default; - /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); @@ -174,8 +162,8 @@ static void cpuidle_idle_call(void) /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); - if (broadcast) - tick_broadcast_exit(); + if (entered_state == -EBUSY) + goto use_default; /* * Give the governor an opportunity to reflect on the outcome -- cgit v1.2.3 From 9c4249c8e0221e5cfae758d35b768aee84abf6c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Apr 2015 14:58:43 +0100 Subject: modsign: change default key details Change default key details to be more obviously unspecified. Reported-by: Linus Torvalds Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: Linus Torvalds --- Documentation/module-signing.txt | 6 +++--- kernel/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt index 09c2382ad055..c72702ec1ded 100644 --- a/Documentation/module-signing.txt +++ b/Documentation/module-signing.txt @@ -119,9 +119,9 @@ Most notably, in the x509.genkey file, the req_distinguished_name section should be altered from the default: [ req_distinguished_name ] - O = Magrathea - CN = Glacier signing key - emailAddress = slartibartfast@magrathea.h2g2 + #O = Unspecified company + CN = Build time autogenerated kernel key + #emailAddress = unspecified.user@unspecified.company The generated RSA key size can also be set with: diff --git a/kernel/Makefile b/kernel/Makefile index 0f8f8b0bc1bf..60c302cfb4d3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -197,9 +197,9 @@ x509.genkey: @echo >>x509.genkey "x509_extensions = myexts" @echo >>x509.genkey @echo >>x509.genkey "[ req_distinguished_name ]" - @echo >>x509.genkey "O = Magrathea" - @echo >>x509.genkey "CN = Glacier signing key" - @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" + @echo >>x509.genkey "#O = Unspecified company" + @echo >>x509.genkey "CN = Build time autogenerated kernel key" + @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company" @echo >>x509.genkey @echo >>x509.genkey "[ myexts ]" @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" -- cgit v1.2.3 From ac01ce1410fc2c7b5f3af5e9c972e6a412eee54f Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 29 Apr 2015 16:18:46 +0100 Subject: tracing: Make ftrace_print_array_seq compute buf_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only caller to this function (__print_array) was getting it wrong by passing the array length instead of buffer length. As the element size was already being passed for other reasons it seems reasonable to push the calculation of buffer length into the function. Link: http://lkml.kernel.org/r/1430320727-14582-1-git-send-email-alex.bennee@linaro.org Signed-off-by: Alex Bennée Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace_output.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46e83c2156c6..f9ecf63d47f1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -46,7 +46,7 @@ const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); const char *ftrace_print_array_seq(struct trace_seq *p, - const void *buf, int buf_len, + const void *buf, int count, size_t el_size); struct trace_iterator; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 692bf7184c8c..25a086bcb700 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) EXPORT_SYMBOL(ftrace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; void *ptr = (void *)buf; + size_t buf_len = count * el_size; trace_seq_putc(p, '{'); -- cgit v1.2.3 From 0782e63bc6fe7e2d3408d250df11d388b7799c6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 May 2015 19:49:49 +0200 Subject: sched: Handle priority boosted tasks proper in setscheduler() Ronny reported that the following scenario is not handled correctly: T1 (prio = 10) lock(rtmutex); T2 (prio = 20) lock(rtmutex) boost T1 T1 (prio = 20) sys_set_scheduler(prio = 30) T1 prio = 30 .... sys_set_scheduler(prio = 10) T1 prio = 30 The last step is wrong as T1 should now be back at prio 20. Commit c365c292d059 ("sched: Consider pi boosting in setscheduler()") only handles the case where a boosted tasks tries to lower its priority. Fix it by taking the new effective priority into account for the decision whether a change of the priority is required. Reported-by: Ronny Meeus Tested-by: Steven Rostedt Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt Cc: Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Mike Galbraith Fixes: c365c292d059 ("sched: Consider pi boosting in setscheduler()") Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1505051806060.4225@nanos Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 7 ++++--- kernel/locking/rtmutex.c | 12 +++++++----- kernel/sched/core.c | 26 ++++++++++++++------------ 3 files changed, 25 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 6341f5be6e24..a30b172df6e1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,7 +18,7 @@ static inline int rt_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_check_prio(struct task_struct *task, int newprio); +extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) @@ -31,9 +31,10 @@ static inline int rt_mutex_getprio(struct task_struct *p) return p->normal_prio; } -static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) +static inline int rt_mutex_get_effective_prio(struct task_struct *task, + int newprio) { - return 0; + return newprio; } static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b73279367087..b025295f4966 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) } /* - * Called by sched_setscheduler() to check whether the priority change - * is overruled by a possible priority boosting. + * Called by sched_setscheduler() to get the priority which will be + * effective after the change. */ -int rt_mutex_check_prio(struct task_struct *task, int newprio) +int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) { if (!task_has_pi_waiters(task)) - return 0; + return newprio; - return task_top_pi_waiter(task)->task->prio <= newprio; + if (task_top_pi_waiter(task)->task->prio <= newprio) + return task_top_pi_waiter(task)->task->prio; + return newprio; } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe22f7510bce..34db9bf892a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3300,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p, /* Actually do priority change: must hold pi & rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) + const struct sched_attr *attr, bool keep_boost) { __setscheduler_params(p, attr); /* - * If we get here, there was no pi waiters boosting the - * task. It is safe to use the normal prio. + * Keep a potential priority boosting if called from + * sched_setscheduler(). */ - p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); + else + p->prio = normal_prio(p); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3408,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p, int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; - int policy = attr->sched_policy; + int new_effective_prio, policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3590,15 +3593,14 @@ change: oldprio = p->prio; /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new * normal parameters and do not touch the scheduler class and * the runqueue. This will be done when the task deboost * itself. */ - if (rt_mutex_check_prio(p, newprio)) { + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { __setscheduler_params(p, attr); task_rq_unlock(rq, p, &flags); return 0; @@ -3612,7 +3614,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr); + __setscheduler(rq, p, attr, true); if (running) p->sched_class->set_curr_task(rq); @@ -7346,7 +7348,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) queued = task_on_rq_queued(p); if (queued) dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr); + __setscheduler(rq, p, &attr, false); if (queued) { enqueue_task(rq, p, 0); resched_curr(rq); -- cgit v1.2.3 From 533445c6e53368569e50ab3fb712230c03d523f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 4 May 2015 03:09:36 -0700 Subject: sched/core: Fix regression in cpuset_cpu_inactive() for suspend Commit 3c18d447b3b3 ("sched/core: Check for available DL bandwidth in cpuset_cpu_inactive()"), a SCHED_DEADLINE bugfix, had a logic error that caused a regression in setting a CPU inactive during suspend. I ran into this when a program was failing pthread_setaffinity_np() with EINVAL after a suspend+wake up. A simple reproducer: $ ./a.out sched_setaffinity: Success $ systemctl suspend $ ./a.out sched_setaffinity: Invalid argument ... where ./a.out is: #define _GNU_SOURCE #include #include #include #include #include #include int main(void) { long num_cores; cpu_set_t cpu_set; int ret; num_cores = sysconf(_SC_NPROCESSORS_ONLN); CPU_ZERO(&cpu_set); CPU_SET(num_cores - 1, &cpu_set); errno = 0; ret = sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set); perror("sched_setaffinity"); return ret ? EXIT_FAILURE : EXIT_SUCCESS; } The mistake is that suspend is handled in the action == CPU_DOWN_PREPARE_FROZEN case of the switch statement in cpuset_cpu_inactive(). However, the commit in question masked out CPU_TASKS_FROZEN from the action, making this case dead. The fix is straightforward. Signed-off-by: Omar Sandoval Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Juri Lelli Cc: Thomas Gleixner Fixes: 3c18d447b3b3 ("sched/core: Check for available DL bandwidth in cpuset_cpu_inactive()") Link: http://lkml.kernel.org/r/1cb5ecb3d6543c38cce5790387f336f54ec8e2bc.1430733960.git.osandov@osandov.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 34db9bf892a3..57bd333bc4ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6999,27 +6999,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, unsigned long flags; long cpu = (long)hcpu; struct dl_bw *dl_b; + bool overflow; + int cpus; - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); - rcu_read_unlock_sched(); + rcu_read_unlock_sched(); - if (overflow) - return notifier_from_errno(-EBUSY); - } + if (overflow) + return notifier_from_errno(-EBUSY); cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: -- cgit v1.2.3 From 8b10c5e2b59ef2a80a07ab594a3b4987a4676211 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2015 16:08:46 +0200 Subject: perf: Annotate inherited event ctx->mutex recursion While fuzzing Sasha tripped over another ctx->mutex recursion lockdep splat. Annotate this. Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- kernel/events/core.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..1a3bf48743ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } -- cgit v1.2.3 From ab992dc38f9ae40b3ab996d68449692d464c98cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 May 2015 11:31:50 +0200 Subject: watchdog: Fix merge 'conflict' Two watchdog changes that came through different trees had a non conflicting conflict, that is, one changed the semantics of a variable but no actual code conflict happened. So the merge appeared fine, but the resulting code did not behave as expected. Commit 195daf665a62 ("watchdog: enable the new user interface of the watchdog mechanism") changes the semantics of watchdog_user_enabled, which thereafter is only used by the functions introduced by b3738d293233 ("watchdog: Add watchdog enable/disable all functions"). There further appears to be a distinct lack of serialization between setting and using watchdog_enabled, so perhaps we should wrap the {en,dis}able_all() things in watchdog_proc_mutex. This patch fixes a s2r failure reported by Michal; which I cannot readily explain. But this does make the code internally consistent again. Reported-and-tested-by: Michal Hocko Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2316f50b07a4..506edcc500c4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,6 +41,8 @@ #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +static DEFINE_MUTEX(watchdog_proc_mutex); + #ifdef CONFIG_HARDLOCKUP_DETECTOR static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else @@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void) { int cpu; - if (!watchdog_user_enabled) - return; + mutex_lock(&watchdog_proc_mutex); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); + +unlock: + mutex_lock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) { int cpu; + mutex_lock(&watchdog_proc_mutex); + if (!watchdog_running) - return; + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } @@ -744,8 +756,6 @@ static int proc_watchdog_update(void) } -static DEFINE_MUTEX(watchdog_proc_mutex); - /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * -- cgit v1.2.3