From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:12 -0400 Subject: ftrace: rename FTRACE to FUNCTION_TRACER Due to confusion between the ftrace infrastructure and the gcc profiling tracer "ftrace", this patch renames the config options from FTRACE to FUNCTION_TRACER. The other two names that are offspring from FTRACE DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same. This patch was generated mostly by script, and partially by hand. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-x86/ftrace.h | 4 ++-- include/linux/ftrace.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h index 1bb6f9bbe1a..233bb9b869c 100644 --- a/include/asm-x86/ftrace.h +++ b/include/asm-x86/ftrace.h @@ -1,7 +1,7 @@ #ifndef ASM_X86__FTRACE_H #define ASM_X86__FTRACE_H -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ @@ -19,6 +19,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #endif -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* ASM_X86__FTRACE_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be1..0e952958915 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,7 +8,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; extern int @@ -36,12 +36,12 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -#else /* !CONFIG_FTRACE */ +#else /* !CONFIG_FUNCTION_TRACER */ # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill_atomic(void) { } -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE # define FTRACE_HASHBITS 10 @@ -101,7 +101,7 @@ void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = 0; #endif } @@ -113,7 +113,7 @@ static inline void tracer_disable(void) */ static inline int __ftrace_enabled_save(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER int saved_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; return saved_ftrace_enabled; @@ -124,7 +124,7 @@ static inline int __ftrace_enabled_save(void) static inline void __ftrace_enabled_restore(int enabled) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = enabled; #endif } -- cgit v1.2.3 From 4ce72a2c063a7fa8e42a9435440ae3364115a58d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Oct 2008 15:25:26 +0800 Subject: sched: add CONFIG_SMP consistency a patch from Henrik Austad did this: >> Do not declare select_task_rq as part of sched_class when CONFIG_SMP is >> not set. Peter observed: > While a proper cleanup, could you do it by re-arranging the methods so > as to not create an additional ifdef? Do not declare select_task_rq and some other methods as part of sched_class when CONFIG_SMP is not set. Also gather those methods to avoid CONFIG_SMP mess. Idea-by: Henrik Austad Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Henrik Austad Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 +++++++----- kernel/sched_fair.c | 5 ++--- kernel/sched_idletask.c | 5 ++--- kernel/sched_rt.c | 5 ++--- 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f59c8e8597..c05b45faef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,6 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq); - int (*select_task_rq)(struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); @@ -905,6 +904,8 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP + int (*select_task_rq)(struct task_struct *p, int sync); + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, @@ -916,16 +917,17 @@ struct sched_class { void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); -#endif - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_new) (struct rq *rq, struct task_struct *p); void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); +#endif + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); + void (*task_new) (struct rq *rq, struct task_struct *p); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a0aa38b10fd..8de48a5da35 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1593,9 +1593,6 @@ static const struct sched_class fair_sched_class = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_fair, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_wakeup, @@ -1603,6 +1600,8 @@ static const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_fair, + .load_balance = load_balance_fair, .move_one_task = move_one_task_fair, #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dec4ccabe2f..8a21a2e28c1 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = { /* dequeue is not valid, we print a debug message there: */ .dequeue_task = dequeue_task_idle, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_idle, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_idle, @@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = { .put_prev_task = put_prev_task_idle, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_idle, + .load_balance = load_balance_idle, .move_one_task = move_one_task_idle, #endif diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index cdf5740ab03..c9aa5bede22 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1502,9 +1502,6 @@ static const struct sched_class rt_sched_class = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_rt, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_rt, @@ -1512,6 +1509,8 @@ static const struct sched_class rt_sched_class = { .put_prev_task = put_prev_task_rt, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_rt, + .load_balance = load_balance_rt, .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, -- cgit v1.2.3 From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:32:59 -0400 Subject: ftrace: return error on failed modified text. Have the ftrace_modify_code return error values: -EFAULT on error of reading the address -EINVAL if what is read does not match what it expected -EPERM if the write fails to update after a successful match. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 14 +++++++------- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- kernel/trace/ftrace.c | 21 +++++++++++++++------ 3 files changed, 44 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8821ceabf51..428291581cb 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -62,7 +62,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; - int ret; /* * Note: Due to modules and __init, code can @@ -72,15 +71,16 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + if (__copy_from_user_inatomic(replaced, (char __user *)ip, + MCOUNT_INSN_SIZE)) + return -EFAULT; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return -EINVAL; - ret = __copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE); - WARN_ON_ONCE(ret); + if (__copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)) + return -EPERM; sync_core(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0e952958915..79fa10cbdcf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,13 +72,33 @@ extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_mcount_set(unsigned long *data); -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/** + * ftrace_modify_code - modify code segment + * @ip: the address of the code segment + * @old_code: the contents of what is expected to be there + * @new_code: the code to patch in + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code); + extern int skip_trace(unsigned long ip); extern void ftrace_release(void *start, unsigned long size); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1f54a94189f..b2de8de7735 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -596,22 +596,22 @@ ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; - int failed; + int ret; ip = rec->ip; nop = ftrace_nop_replace(); call = ftrace_call_replace(ip, mcount_addr); - failed = ftrace_modify_code(ip, call, nop); - if (failed) { - switch (failed) { - case 1: + ret = ftrace_modify_code(ip, call, nop); + if (ret) { + switch (ret) { + case -EFAULT: WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; - case 2: + case -EINVAL: WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); @@ -620,6 +620,15 @@ ftrace_code_disable(struct dyn_ftrace *rec) print_ip_ins(" replace: ", nop); printk(KERN_CONT "\n"); break; + case -EPERM: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); } rec->flags |= FTRACE_FL_FAILED; -- cgit v1.2.3 From 81adbdc029ecc416d56563e7f159100181dd711d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:02 -0400 Subject: ftrace: only have ftrace_kill atomic When an anomaly is detected, we need a way to completely disable ftrace. Right now we have two functions: ftrace_kill and ftrace_kill_atomic. The ftrace_kill tries to do it in a "nice" way by converting everything back to a nop. The "nice" way is dangerous itself, so this patch removes it and only has the "atomic" version, which is all that is needed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 42 ++---------------------------------------- kernel/trace/trace.c | 2 +- 3 files changed, 4 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 79fa10cbdcf..ac58e94668b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -40,7 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) -static inline void ftrace_kill_atomic(void) { } +static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -117,7 +117,6 @@ static inline void ftrace_release(void *start, unsigned long size) { } /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -void ftrace_kill_atomic(void); static inline void tracer_disable(void) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b2de8de7735..93245ae046e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1549,22 +1549,6 @@ int ftrace_force_update(void) return ret; } -static void ftrace_force_shutdown(void) -{ - struct task_struct *task; - int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; - - mutex_lock(&ftraced_lock); - task = ftraced_task; - ftraced_task = NULL; - ftraced_suspend = -1; - ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); - - if (task) - kthread_stop(task); -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1795,17 +1779,16 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** - * ftrace_kill_atomic - kill ftrace from critical sections + * ftrace_kill - kill ftrace * * This function should be used by panic code. It stops ftrace * but in a not so nice way. If you need to simply kill ftrace * from a non-atomic section, use ftrace_kill. */ -void ftrace_kill_atomic(void) +void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; @@ -1815,27 +1798,6 @@ void ftrace_kill_atomic(void) clear_ftrace_function(); } -/** - * ftrace_kill - totally shutdown ftrace - * - * This is a safety measure. If something was detected that seems - * wrong, calling this function will keep ftrace from doing - * any more modifications, and updates. - * used when something went wrong. - */ -void ftrace_kill(void) -{ - mutex_lock(&ftrace_sysctl_lock); - ftrace_disabled = 1; - ftrace_enabled = 0; - - clear_ftrace_function(); - mutex_unlock(&ftrace_sysctl_lock); - - /* Try to totally disable ftrace */ - ftrace_force_shutdown(); -} - /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aeb2f2505bc..333a5162149 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3097,7 +3097,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ - ftrace_kill_atomic(); + ftrace_kill(); for_each_tracing_cpu(cpu) { atomic_inc(&global_trace.data[cpu]->disabled); -- cgit v1.2.3 From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:06 -0400 Subject: ftrace: remove mcount set The arch dependent function ftrace_mcount_set was only used by the daemon start up code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/kernel/ftrace.c | 13 ------------- arch/powerpc/kernel/ftrace.c | 17 ----------------- arch/sparc64/kernel/ftrace.c | 18 ------------------ arch/x86/kernel/ftrace.c | 7 ------- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 9 --------- 6 files changed, 65 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 76d50e6091b..6c90479e897 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -95,19 +95,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -int ftrace_mcount_set(unsigned long *data) -{ - unsigned long pc, old; - unsigned long *addr = data; - unsigned char *new; - - pc = (unsigned long)&mcount_call; - memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(pc, *addr); - *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); - return 0; -} - /* run from kstop_machine */ int __init ftrace_dyn_arch_init(void *data) { diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 3855ceb937b..6b75522e8b3 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -126,23 +126,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { /* This is running in kstop_machine */ diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c index 4298d0aee71..447942041a7 100644 --- a/arch/sparc64/kernel/ftrace.c +++ b/arch/sparc64/kernel/ftrace.c @@ -69,24 +69,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(ip, old, new); } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - - int __init ftrace_dyn_arch_init(void *data) { ftrace_mcount_set(data); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index da4fb0deecf..b399eed2353 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -103,13 +103,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - /* mcount is initialized as a nop */ - *data = 0; - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { extern const unsigned char ftrace_test_p6nop[]; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ac58e94668b..1c4835f8691 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,7 +71,6 @@ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); -extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e758cab0836..226fd9132d5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -620,7 +620,6 @@ static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { - unsigned long addr; int *command = data; if (*command & FTRACE_ENABLE_CALLS) { @@ -639,14 +638,6 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); - if (*command & FTRACE_ENABLE_MCOUNT) { - addr = (unsigned long)ftrace_record_ip; - ftrace_mcount_set(&addr); - } else if (*command & FTRACE_DISABLE_MCOUNT) { - addr = (unsigned long)ftrace_stub; - ftrace_mcount_set(&addr); - } - return 0; } -- cgit v1.2.3 From 08f5ac906d2c0faf96d608c54a0b03177376da8d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:07 -0400 Subject: ftrace: remove ftrace hash The ftrace hash was used by the ftrace_daemon code. The record ip function would place the calling address (ip) into the hash. The daemon would later read the hash and modify that code. The hash complicates the code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 8 +- kernel/trace/ftrace.c | 243 +++++++------------------------------------------ kernel/trace/trace.c | 3 - 3 files changed, 38 insertions(+), 216 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1c4835f8691..703eb53cfa2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,8 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_HASHBITS 10 -# define FTRACE_HASHSIZE (1< #include #include -#include #include #include @@ -189,9 +188,7 @@ static int ftrace_filtered; static int tracing_on; static int frozen_record_count; -static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; - -static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); +static LIST_HEAD(ftrace_new_addrs); static DEFINE_MUTEX(ftrace_regex_lock); @@ -210,8 +207,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftrace_record_suspend; - static struct dyn_ftrace *ftrace_free_records; @@ -242,72 +237,6 @@ static inline int record_frozen(struct dyn_ftrace *rec) # define record_frozen(rec) ({ 0; }) #endif /* CONFIG_KPROBES */ -int skip_trace(unsigned long ip) -{ - unsigned long fl; - struct dyn_ftrace *rec; - struct hlist_node *t; - struct hlist_head *head; - - if (frozen_record_count == 0) - return 0; - - head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)]; - hlist_for_each_entry_rcu(rec, t, head, node) { - if (rec->ip == ip) { - if (record_frozen(rec)) { - if (rec->flags & FTRACE_FL_FAILED) - return 1; - - if (!(rec->flags & FTRACE_FL_CONVERTED)) - return 1; - - if (!tracing_on || !ftrace_enabled) - return 1; - - if (ftrace_filtered) { - fl = rec->flags & (FTRACE_FL_FILTER | - FTRACE_FL_NOTRACE); - if (!fl || (fl & FTRACE_FL_NOTRACE)) - return 1; - } - } - break; - } - } - - return 0; -} - -static inline int -ftrace_ip_in_hash(unsigned long ip, unsigned long key) -{ - struct dyn_ftrace *p; - struct hlist_node *t; - int found = 0; - - hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { - if (p->ip == ip) { - found = 1; - break; - } - } - - return found; -} - -static inline void -ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) -{ - hlist_add_head_rcu(&node->node, &ftrace_hash[key]); -} - -/* called from kstop_machine */ -static inline void ftrace_del_hash(struct dyn_ftrace *node) -{ - hlist_del(&node->node); -} - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->ip = (unsigned long)ftrace_free_records; @@ -362,69 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) } if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; + if (!ftrace_pages->next) { + /* allocate another page */ + ftrace_pages->next = + (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages->next) + return NULL; + } ftrace_pages = ftrace_pages->next; } return &ftrace_pages->records[ftrace_pages->index++]; } -static void +static struct dyn_ftrace * ftrace_record_ip(unsigned long ip) { - struct dyn_ftrace *node; - unsigned long key; - int resched; - int cpu; + struct dyn_ftrace *rec; if (!ftrace_enabled || ftrace_disabled) - return; - - resched = need_resched(); - preempt_disable_notrace(); - - /* - * We simply need to protect against recursion. - * Use the the raw version of smp_processor_id and not - * __get_cpu_var which can call debug hooks that can - * cause a recursive crash here. - */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1) - goto out; - - if (unlikely(ftrace_record_suspend)) - goto out; - - key = hash_long(ip, FTRACE_HASHBITS); - - FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); - - if (ftrace_ip_in_hash(ip, key)) - goto out; - - /* This ip may have hit the hash before the lock */ - if (ftrace_ip_in_hash(ip, key)) - goto out; - - node = ftrace_alloc_dyn_node(ip); - if (!node) - goto out; + return NULL; - node->ip = ip; + rec = ftrace_alloc_dyn_node(ip); + if (!rec) + return NULL; - ftrace_add_hash(node, key); + rec->ip = ip; - out: - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + list_add(&rec->list, &ftrace_new_addrs); - /* prevent recursion with scheduler */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + return rec; } #define FTRACE_ADDR ((long)(ftrace_caller)) @@ -543,7 +439,6 @@ static void ftrace_replace_code(int enable) rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { - ftrace_del_hash(rec); ftrace_free_rec(rec); } } @@ -551,15 +446,6 @@ static void ftrace_replace_code(int enable) } } -static void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - static void print_ip_ins(const char *fmt, unsigned char *p) { int i; @@ -616,18 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int ftrace_update_code(void *ignore); - static int __ftrace_modify_code(void *data) { int *command = data; if (*command & FTRACE_ENABLE_CALLS) { - /* - * Update any recorded ips now that we have the - * machine stopped - */ - ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -738,84 +617,34 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void *ignore) +static int ftrace_update_code(void) { - int i, save_ftrace_enabled; + struct dyn_ftrace *p, *t; cycle_t start, stop; - struct dyn_ftrace *p; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - - /* Don't be recording funcs now */ - ftrace_record_suspend++; - save_ftrace_enabled = ftrace_enabled; - ftrace_enabled = 0; start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - /* No locks needed, the machine is stopped! */ - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; + list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(p, t, n, head, node) { - /* Skip over failed records which have not been - * freed. */ - if (p->flags & FTRACE_FL_FAILED) - continue; + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return -1; - /* Unconverted records are always at the head of the - * hash bucket. Once we encounter a converted record, - * simply skip over to the next bucket. Saves ftraced - * some processor cycles (ftrace does its bid for - * global warming :-p ). */ - if (p->flags & (FTRACE_FL_CONVERTED)) - break; + list_del_init(&p->list); - /* Ignore updates to this record's mcount site. - * Reintroduce this record at the head of this - * bucket to attempt to "convert" it again if - * the kprobe on it is unregistered before the - * next run. */ - if (get_kprobe((void *)p->ip)) { - ftrace_del_hash(p); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, &temp_list); - freeze_record(p); - continue; - } else { - unfreeze_record(p); - } - - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else { - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(p->ip)) { - ftrace_del_hash(p); - ftrace_free_rec(p); - } - } - } - - hlist_for_each_entry_safe(p, t, n, &temp_list, node) { - hlist_del(&p->node); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, head); - } + /* convert record (i.e, patch mcount-call with NOP) */ + if (ftrace_code_disable(p)) { + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + } else + ftrace_free_rec(p); } stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftrace_enabled = save_ftrace_enabled; - ftrace_record_suspend--; - return 0; } @@ -847,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) pg = ftrace_pages = ftrace_pages_start; cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld hash entries in %d pages\n", + pr_info("ftrace: allocating %ld entries in %d pages\n", num_to_init, cnt); for (i = 0; i < cnt; i++) { @@ -1451,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start, unsigned long addr; unsigned long flags; + mutex_lock(&ftrace_start_lock); p = start; while (p < end) { addr = ftrace_call_adjust(*p++); - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); ftrace_record_ip(addr); - spin_unlock(&ftrace_lock); - ftrace_shutdown_replenish(); } - /* p is ignored */ + /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(p); + ftrace_update_code(); local_irq_restore(flags); + mutex_unlock(&ftrace_start_lock); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 333a5162149..06951e22944 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -865,9 +865,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_function_enabled)) return; - if (skip_trace(ip)) - return; - pc = preempt_count(); resched = need_resched(); preempt_disable_notrace(); -- cgit v1.2.3 From 5209f08dc8e5f520ca81b87fa9a7142f58a109f4 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Sat, 25 Oct 2008 11:49:20 +0200 Subject: asm-generic: define DIE_OOPS in asm-generic Impact: build fix DIE_OOPS is now used in the generic trace handling code so it needs to be defined for all architectures. Define it in asm-generic so that it's available to all by default and doesn't cause build errors for architectures that rely on the generic implementation. Signed-off-by: Jonas Bonn Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/kdebug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/kdebug.h b/include/asm-generic/kdebug.h index 2b799c90b2d..11e57b6a85f 100644 --- a/include/asm-generic/kdebug.h +++ b/include/asm-generic/kdebug.h @@ -3,6 +3,7 @@ enum die_val { DIE_UNUSED, + DIE_OOPS=1 }; #endif /* _ASM_GENERIC_KDEBUG_H */ -- cgit v1.2.3 From 3d5afd324a4bf9f64f59599bf1e93cd7dd1dc97a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 27 Oct 2008 12:16:15 +0100 Subject: HID: fix oops during suspend of unbound HID devices Usbhid structure is allocated on start invoked only from probe of some driver. When there is no driver, the structure is null and causes null-dereference oopses. Fix it by allocating the structure on probe and disconnect of the device itself. Also make sure we won't race between start and resume or stop and suspend respectively. References: http://bugzilla.kernel.org/show_bug.cgi?id=11827 Signed-off-by: Jiri Slaby Cc: Johannes Berg Cc: Andreas Schwab Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 58 ++++++++++++++++++++++++++++++------------- drivers/hid/usbhid/usbhid.h | 2 ++ include/linux/hid.h | 1 + 3 files changed, 44 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 42bdd83444c..3b1c489998c 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -776,21 +777,10 @@ static int usbhid_start(struct hid_device *hid) struct usb_interface *intf = to_usb_interface(hid->dev.parent); struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev(intf); - struct usbhid_device *usbhid; + struct usbhid_device *usbhid = hid->driver_data; unsigned int n, insize = 0; int ret; - WARN_ON(hid->driver_data); - - usbhid = kzalloc(sizeof(struct usbhid_device), GFP_KERNEL); - if (usbhid == NULL) { - ret = -ENOMEM; - goto err; - } - - hid->driver_data = usbhid; - usbhid->hid = hid; - usbhid->bufsize = HID_MIN_BUFFER_SIZE; hid_find_max_report(hid, HID_INPUT_REPORT, &usbhid->bufsize); hid_find_max_report(hid, HID_OUTPUT_REPORT, &usbhid->bufsize); @@ -804,6 +794,7 @@ static int usbhid_start(struct hid_device *hid) if (insize > HID_MAX_BUFFER_SIZE) insize = HID_MAX_BUFFER_SIZE; + mutex_lock(&usbhid->setup); if (hid_alloc_buffers(dev, hid)) { ret = -ENOMEM; goto fail; @@ -888,6 +879,9 @@ static int usbhid_start(struct hid_device *hid) usbhid_init_reports(hid); hid_dump_device(hid); + set_bit(HID_STARTED, &usbhid->iofl); + mutex_unlock(&usbhid->setup); + return 0; fail: @@ -895,8 +889,7 @@ fail: usb_free_urb(usbhid->urbout); usb_free_urb(usbhid->urbctrl); hid_free_buffers(dev, hid); - kfree(usbhid); -err: + mutex_unlock(&usbhid->setup); return ret; } @@ -907,6 +900,8 @@ static void usbhid_stop(struct hid_device *hid) if (WARN_ON(!usbhid)) return; + mutex_lock(&usbhid->setup); + clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->inlock); /* Sync with error handler */ set_bit(HID_DISCONNECTED, &usbhid->iofl); spin_unlock_irq(&usbhid->inlock); @@ -931,8 +926,7 @@ static void usbhid_stop(struct hid_device *hid) usb_free_urb(usbhid->urbout); hid_free_buffers(hid_to_usb_dev(hid), hid); - kfree(usbhid); - hid->driver_data = NULL; + mutex_unlock(&usbhid->setup); } static struct hid_ll_driver usb_hid_driver = { @@ -947,6 +941,7 @@ static struct hid_ll_driver usb_hid_driver = { static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *dev = interface_to_usbdev(intf); + struct usbhid_device *usbhid; struct hid_device *hid; size_t len; int ret; @@ -1000,14 +995,26 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0) hid->uniq[0] = 0; + usbhid = kzalloc(sizeof(*usbhid), GFP_KERNEL); + if (usbhid == NULL) { + ret = -ENOMEM; + goto err; + } + + hid->driver_data = usbhid; + usbhid->hid = hid; + mutex_init(&usbhid->setup); /* needed on suspend/resume */ + ret = hid_add_device(hid); if (ret) { if (ret != -ENODEV) dev_err(&intf->dev, "can't add hid device: %d\n", ret); - goto err; + goto err_free; } return 0; +err_free: + kfree(usbhid); err: hid_destroy_device(hid); return ret; @@ -1016,11 +1023,14 @@ err: static void hid_disconnect(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); + struct usbhid_device *usbhid; if (WARN_ON(!hid)) return; + usbhid = hid->driver_data; hid_destroy_device(hid); + kfree(usbhid); } static int hid_suspend(struct usb_interface *intf, pm_message_t message) @@ -1028,11 +1038,18 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) struct hid_device *hid = usb_get_intfdata (intf); struct usbhid_device *usbhid = hid->driver_data; + mutex_lock(&usbhid->setup); + if (!test_bit(HID_STARTED, &usbhid->iofl)) { + mutex_unlock(&usbhid->setup); + return 0; + } + spin_lock_irq(&usbhid->inlock); /* Sync with error handler */ set_bit(HID_SUSPENDED, &usbhid->iofl); spin_unlock_irq(&usbhid->inlock); del_timer(&usbhid->io_retry); usb_kill_urb(usbhid->urbin); + mutex_unlock(&usbhid->setup); dev_dbg(&intf->dev, "suspend\n"); return 0; } @@ -1043,9 +1060,16 @@ static int hid_resume(struct usb_interface *intf) struct usbhid_device *usbhid = hid->driver_data; int status; + mutex_lock(&usbhid->setup); + if (!test_bit(HID_STARTED, &usbhid->iofl)) { + mutex_unlock(&usbhid->setup); + return 0; + } + clear_bit(HID_SUSPENDED, &usbhid->iofl); usbhid->retry_delay = 0; status = hid_start_in(hid); + mutex_unlock(&usbhid->setup); dev_dbg(&intf->dev, "resume status %d\n", status); return status; } diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h index abedb13c623..55973ff5400 100644 --- a/drivers/hid/usbhid/usbhid.h +++ b/drivers/hid/usbhid/usbhid.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ struct usbhid_device { dma_addr_t outbuf_dma; /* Output buffer dma */ spinlock_t outlock; /* Output fifo spinlock */ + struct mutex setup; unsigned long iofl; /* I/O flags (CTRL_RUNNING, OUT_RUNNING) */ struct timer_list io_retry; /* Retry timer */ unsigned long stop_retry; /* Time to give up, in jiffies */ diff --git a/include/linux/hid.h b/include/linux/hid.h index 5355ca4b939..e5780f8c934 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -410,6 +410,7 @@ struct hid_output_fifo { #define HID_SUSPENDED 5 #define HID_CLEAR_HALT 6 #define HID_DISCONNECTED 7 +#define HID_STARTED 8 struct hid_input { struct list_head list; -- cgit v1.2.3 From 3070f69b66b7ab2f02d8a2500edae07039c38508 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Oct 2008 19:20:59 +0100 Subject: scsi: make sure that scsi_init_shared_tag_map() doesn't overwrite existing map Right now callers have to check whether scsi_host->bqt is already set up, it's much cleaner to just have scsi_init_shared_tag_map() does this check on its own. Signed-off-by: Jens Axboe --- include/scsi/scsi_tcq.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index cf4c219c0b5..17231385cb3 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -140,8 +140,18 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag) */ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) { - shost->bqt = blk_init_tags(depth); - return shost->bqt ? 0 : -ENOMEM; + /* + * If the shared tag map isn't already initialized, do it now. + * This saves callers from having to check ->bqt when setting up + * devices on the shared host (for libata) + */ + if (!shost->bqt) { + shost->bqt = blk_init_tags(depth); + if (!shost->bqt) + return -ENOMEM; + } + + return 0; } /** -- cgit v1.2.3 From ea2d8b59bc7b770fde03de2bb9b3ee46e8bdc8d5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 27 Oct 2008 09:47:03 -0700 Subject: mac80211.h: fix kernel-doc excesses Fix mac80211.h kernel-doc: it had some extra parameters that were no longer valid and incorrect format for a return value in 2 places. Warning(lin2628-rc2//include/net/mac80211.h:1487): Excess function parameter or struct member 'control' description in 'ieee80211_beacon_get' Warning(lin2628-rc2//include/net/mac80211.h:1596): Excess function parameter or struct member 'control' description in 'ieee80211_get_buffered_bc' Warning(lin2628-rc2//include/net/mac80211.h:1632): Excess function parameter or struct member 'rc4key' description in 'ieee80211_get_tkip_key' Warning(lin2628-rc2//include/net/mac80211.h:1735): Excess function parameter or struct member 'return' description in 'ieee80211_start_tx_ba_session' Warning(lin2628-rc2//include/net/mac80211.h:1775): Excess function parameter or struct member 'return' description in 'ieee80211_stop_tx_ba_session' Signed-off-by: Randy Dunlap Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d861197f83c..8856e2d60e9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1474,7 +1474,6 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, * ieee80211_beacon_get - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. - * @control: will be filled with information needed to send this beacon. * * If the beacon frames are generated by the host system (i.e., not in * hardware/firmware), the low-level driver uses this function to receive @@ -1575,7 +1574,6 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, * ieee80211_get_buffered_bc - accessing buffered broadcast and multicast frames * @hw: pointer as obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. - * @control: will be filled with information needed to send returned frame. * * Function for accessing buffered broadcast and multicast frames. If * hardware/firmware does not implement buffering of broadcast/multicast @@ -1623,9 +1621,8 @@ unsigned int ieee80211_hdrlen(__le16 fc); * * @keyconf: the parameter passed with the set key * @skb: the skb for which the key is needed - * @rc4key: a buffer to which the key will be written * @type: TBD - * @key: TBD + * @key: a buffer to which the key will be written */ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, struct sk_buff *skb, @@ -1726,7 +1723,8 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw(). * @ra: receiver address of the BA session recipient * @tid: the TID to BA on. - * @return: success if addBA request was sent, failure otherwise + * + * Return: success if addBA request was sent, failure otherwise * * Although mac80211/low level driver/user space application can estimate * the need to start aggregation on a certain RA/TID, the session level @@ -1764,7 +1762,8 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, * @ra: receiver address of the BA session recipient * @tid: the TID to stop BA. * @initiator: if indicates initiator DELBA frame will be sent. - * @return: error if no sta with matching da found, success otherwise + * + * Return: error if no sta with matching da found, success otherwise * * Although mac80211/low level driver/user space application can estimate * the need to stop aggregation on a certain RA/TID, the session level -- cgit v1.2.3 From 5550af4df179e52753d3a43a788a113ad8cd95cd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 15 Oct 2008 20:15:06 +0800 Subject: KVM: Fix guest shared interrupt with in-kernel irqchip Every call of kvm_set_irq() should offer an irq_source_id, which is allocated by kvm_request_irq_source_id(). Based on irq_source_id, we identify the irq source and implement logical OR for shared level interrupts. The allocated irq_source_id can be freed by kvm_free_irq_source_id(). Currently, we support at most sizeof(unsigned long) different irq sources. [Amit: - rebase to kvm.git HEAD - move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file - move kvm_request_irq_source_id to the update_irq ioctl] [Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests] Signed-off-by: Sheng Yang Signed-off-by: Amit Shah Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 3 +++ arch/ia64/kvm/kvm-ia64.c | 8 +++++--- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/i8254.c | 11 +++++++++-- arch/x86/kvm/i8254.h | 1 + arch/x86/kvm/x86.c | 6 +++++- include/linux/kvm_host.h | 7 ++++++- virt/kvm/irq_comm.c | 42 +++++++++++++++++++++++++++++++++++++--- virt/kvm/kvm_main.c | 12 ++++++++---- 9 files changed, 79 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 85db124d37f..04c0b88f7b3 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -417,6 +417,9 @@ struct kvm_arch { struct list_head assigned_dev_head; struct dmar_domain *intel_iommu_domain; struct hlist_head irq_ack_notifier_list; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; union cpuid3_t { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a312c9e9b9e..8a2b13ff0af 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -778,6 +778,9 @@ static void kvm_init_vm(struct kvm *kvm) kvm_build_io_pmt(kvm); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); } struct kvm *kvm_arch_create_vm(void) @@ -941,9 +944,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_ioapic_set_irq(kvm->arch.vioapic, - irq_event.irq, - irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 65679d00633..8346be87cfa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -364,6 +364,9 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11c6725fb79..8772dc94682 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -545,6 +545,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; + mutex_lock(&kvm->lock); + pit->irq_source_id = kvm_request_irq_source_id(kvm); + mutex_unlock(&kvm->lock); + if (pit->irq_source_id < 0) + return NULL; + mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); spin_lock_init(&pit->pit_state.inject_lock); @@ -587,6 +593,7 @@ void kvm_free_pit(struct kvm *kvm) mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); mutex_unlock(&kvm->arch.vpit->pit_state.lock); kfree(kvm->arch.vpit); } @@ -595,8 +602,8 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, 0, 1); - kvm_set_irq(kvm, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index e436d4983aa..4178022b97a 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -44,6 +44,7 @@ struct kvm_pit { struct kvm_io_device speaker_dev; struct kvm *kvm; struct kvm_kpit_state pit_state; + int irq_source_id; }; #define KVM_PIT_BASE_ADDRESS 0x40 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f0677d1eae..f1f8ff2f1fa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, irq_event.irq, irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } @@ -4013,6 +4014,9 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + return kvm; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3833c48fae3..bb92be2153b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -37,6 +37,8 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 +#define KVM_USERSPACE_IRQ_SOURCE_ID 0 + struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; @@ -306,15 +308,18 @@ struct kvm_assigned_dev_kernel { int host_irq; int guest_irq; int irq_requested; + int irq_source_id; struct pci_dev *dev; struct kvm *kvm; }; -void kvm_set_irq(struct kvm *kvm, int irq, int level); +void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); +int kvm_request_irq_source_id(struct kvm *kvm); +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index d0169f5e604..55ad76ee2d0 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -25,15 +25,23 @@ #include "ioapic.h" /* This should be called with the kvm->lock mutex held */ -void kvm_set_irq(struct kvm *kvm, int irq, int level) +void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { + unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; + + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); #ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), irq, level); + kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); #endif } @@ -58,3 +66,31 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, { hlist_del(&kian->link); } + +/* The caller must hold kvm->lock mutex */ +int kvm_request_irq_source_id(struct kvm *kvm) +{ + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; + int irq_source_id = find_first_zero_bit(bitmap, + sizeof(kvm->arch.irq_sources_bitmap)); + if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); + irq_source_id = -EFAULT; + } else + set_bit(irq_source_id, bitmap); + return irq_source_id; +} + +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) +{ + int i; + + if (irq_source_id <= 0 || + irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); + return; + } + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) + clear_bit(irq_source_id, &kvm->arch.irq_states[i]); + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cf0ab8ed384..a87f45edfae 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -105,14 +105,12 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) */ mutex_lock(&assigned_dev->kvm->lock); kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, assigned_dev->guest_irq, 1); mutex_unlock(&assigned_dev->kvm->lock); kvm_put_kvm(assigned_dev->kvm); } -/* FIXME: Implement the OR logic needed to make shared interrupts on - * this line behave properly - */ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = @@ -134,7 +132,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev = container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->guest_irq, 0); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); enable_irq(dev->host_irq); } @@ -146,6 +144,7 @@ static void kvm_free_assigned_device(struct kvm *kvm, free_irq(assigned_dev->host_irq, (void *)assigned_dev); kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); if (cancel_work_sync(&assigned_dev->interrupt_work)) /* We had pending work. That means we will have to take @@ -215,6 +214,11 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, match->ack_notifier.gsi = assigned_irq->guest_irq; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + r = kvm_request_irq_source_id(kvm); + if (r < 0) + goto out_release; + else + match->irq_source_id = r; /* Even though this is PCI, we don't want to use shared * interrupts. Sharing host devices with guest-assigned devices -- cgit v1.2.3 From bb45e202e695dea8657bb03a01d1522c37558672 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 19 Oct 2008 16:39:45 +0200 Subject: KVM: Future-proof device assignment ABI Reserve some space so we can add more data. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 797fcd78124..f18b86fa865 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + union { + __u32 reserved[12]; + }; }; struct kvm_assigned_irq { @@ -496,6 +499,9 @@ struct kvm_assigned_irq { __u32 host_irq; __u32 guest_irq; __u32 flags; + union { + __u32 reserved[12]; + }; }; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 8175fe2dda1c93a9c596921c8ed4a0b4baccdefe Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 26 Oct 2008 00:30:18 +0200 Subject: HID: fix hid_device_id for cross compiling struct hid_device_id contains hidden padding which is bad for cross compiling. Make the padding explicit and consistent across architectures. Signed-off-by: Andreas Schwab Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index eb71b45fdf5..97b91d1abb4 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -135,6 +135,7 @@ struct usb_device_id { struct hid_device_id { __u16 bus; + __u16 pad1; __u32 vendor; __u32 product; kernel_ulong_t driver_data -- cgit v1.2.3 From 3f5e26cee443eb4d3900cd3085664c3e51b72135 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Oct 2008 15:02:51 -0700 Subject: adjust init section definitions Add rodata equivalents for assembly use, and fix the section attributes used by __REFCONST. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- include/linux/init.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 0c1264668be..68cb0265d00 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -112,21 +112,25 @@ #define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#define __INITRODATA .section ".init.rodata","a" #define __FINITDATA .previous #define __DEVINIT .section ".devinit.text", "ax" #define __DEVINITDATA .section ".devinit.data", "aw" +#define __DEVINITRODATA .section ".devinit.rodata", "a" #define __CPUINIT .section ".cpuinit.text", "ax" #define __CPUINITDATA .section ".cpuinit.data", "aw" +#define __CPUINITRODATA .section ".cpuinit.rodata", "a" #define __MEMINIT .section ".meminit.text", "ax" #define __MEMINITDATA .section ".meminit.data", "aw" +#define __MEMINITRODATA .section ".meminit.rodata", "a" /* silence warnings when references are OK */ #define __REF .section ".ref.text", "ax" #define __REFDATA .section ".ref.data", "aw" -#define __REFCONST .section ".ref.rodata", "aw" +#define __REFCONST .section ".ref.rodata", "a" #ifndef __ASSEMBLY__ /* -- cgit v1.2.3 From 762b8df748d83c14070afbf0c6f8c0f4a91a13bf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Oct 2008 12:37:08 +0000 Subject: ALSA: ASoC: Fix mono controls after conversion to support full int masks When ASoC was converted to support full int width masks SOC_SINGLE_VALUE() omitted the assignment of rshift, causing the control operatins to report some mono controls as stereo. This happened to work some of the time due to a confusion between shift and min in snd_soc_info_volsw(). Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/soc.h | 3 ++- sound/soc/soc-core.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index a1e0357a84d..5e0189876af 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -28,7 +28,8 @@ */ #define SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) \ ((unsigned long)&(struct soc_mixer_control) \ - {.reg = xreg, .shift = xshift, .max = xmax, .invert = xinvert}) + {.reg = xreg, .shift = xshift, .rshift = xshift, .max = xmax, \ + .invert = xinvert}) #define SOC_SINGLE_VALUE_EXT(xreg, xmax, xinvert) \ ((unsigned long)&(struct soc_mixer_control) \ {.reg = xreg, .max = xmax, .invert = xinvert}) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 462e635dfc7..a3adbf06b1e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1462,7 +1462,7 @@ int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; int max = mc->max; - unsigned int shift = mc->min; + unsigned int shift = mc->shift; unsigned int rshift = mc->rshift; if (max == 1) -- cgit v1.2.3 From 0833422274ff00729a603b020fac297e69a03e40 Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Wed, 29 Oct 2008 14:00:48 -0700 Subject: mm: increase the default mlock limit from 32k to 64k By default, non-privileged tasks can only mlock() a small amount of memory to avoid a DoS attack by ordinary users. The Linux kernel defaulted to 32k (on a 4k page size system) to accommodate the needs of gpg. However, newer gpg2 needs 64k in various circumstances and otherwise fails miserably, see bnc#329675. Change the default to 64k, and make it more agnostic to PAGE_SIZE. Signed-off-by: Kurt Garloff Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/resource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/resource.h b/include/linux/resource.h index aaa423a6f3d..40fc7e62608 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -59,10 +59,10 @@ struct rlimit { #define _STK_LIM (8*1024*1024) /* - * GPG wants 32kB of mlocked memory, to make sure pass phrases + * GPG2 wants 64kB of mlocked memory, to make sure pass phrases * and other sensitive information are never written to disk. */ -#define MLOCK_LIMIT (8 * PAGE_SIZE) +#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) /* * Due to binary compatibility, the actual resource numbers -- cgit v1.2.3 From 00c2e63c31d0f431952ff2a671c5c6997dd4f8b2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:53 -0700 Subject: freezer_cg: use thaw_process() in unfreeze_cgroup() Don't duplicate the implementation of thaw_process(). [akpm@linux-foundation.org: make __thaw_process() static] Signed-off-by: Li Zefan Cc: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 5 ----- kernel/cgroup_freezer.c | 15 ++++----------- kernel/freezer.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8f225339eee..5a361f85cfe 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -44,11 +44,6 @@ static inline bool should_send_signal(struct task_struct *p) return !(p->flags & PF_FREEZER_NOSIG); } -/* - * Wake up a frozen process - */ -extern int __thaw_process(struct task_struct *p); - /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e9c856a265c..5e6d26b66e8 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -275,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) return num_cant_freeze_now ? -EBUSY : 0; } -static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) { struct cgroup_iter it; struct task_struct *task; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - int do_wake; - - task_lock(task); - do_wake = __thaw_process(task); - task_unlock(task); - if (do_wake) - wake_up_process(task); + thaw_process(task); } cgroup_iter_end(cgroup, &it); - freezer->state = CGROUP_THAWED; - return 0; + freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -320,7 +313,7 @@ static int freezer_change_state(struct cgroup *cgroup, } /* state == FREEZING and goal_state == THAWED, so unfreeze */ case CGROUP_FROZEN: - retval = unfreeze_cgroup(cgroup, freezer); + unfreeze_cgroup(cgroup, freezer); break; default: break; diff --git a/kernel/freezer.c b/kernel/freezer.c index ba6248b323e..2f4936cf708 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p) } } -/* - * Wake up a frozen process - * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. - */ -int __thaw_process(struct task_struct *p) +static int __thaw_process(struct task_struct *p) { if (frozen(p)) { p->flags &= ~PF_FROZEN; @@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p) return 0; } +/* + * Wake up a frozen process + * + * task_lock() is needed to prevent the race with refrigerator() which may + * occur if the freezing of tasks fails. Namely, without the lock, if the + * freezing of tasks failed, thaw_tasks() might have run before a task in + * refrigerator() could call frozen_process(), in which case the task would be + * frozen and no one would thaw it. + */ int thaw_process(struct task_struct *p) { task_lock(p); -- cgit v1.2.3 From 9b913735e53ab0da4a792bac0de8e178cc13dcfb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:54 -0700 Subject: cgroups: tiny cleanups - remove 'private' field from struct subsys - remove cgroup_init_smp() Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8b00f6643e9..1164963c3a8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -25,7 +25,6 @@ struct cgroup; extern int cgroup_init_early(void); extern int cgroup_init(void); -extern void cgroup_init_smp(void); extern void cgroup_lock(void); extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); @@ -348,8 +347,6 @@ struct cgroup_subsys { struct cgroupfs_root *root; struct list_head sibling; - - void *private; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; @@ -410,7 +407,6 @@ void cgroup_mm_owner_callbacks(struct task_struct *old, static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } -static inline void cgroup_init_smp(void) {} static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork_callbacks(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} -- cgit v1.2.3 From 4e02ed4b4a2fae34aae766a5bb93ae235f60adb8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 29 Oct 2008 14:00:55 -0700 Subject: fs: remove prepare_write/commit_write Nothing uses prepare_write or commit_write. Remove them from the tree completely. [akpm@linux-foundation.org: schedule simple_prepare_write() for unexporting] Signed-off-by: Nick Piggin Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 12 +- Documentation/filesystems/vfs.txt | 39 +----- drivers/block/loop.c | 5 +- fs/fat/inode.c | 2 +- fs/libfs.c | 2 +- fs/ocfs2/file.c | 3 +- fs/splice.c | 4 +- include/linux/fs.h | 7 -- mm/filemap.c | 242 +------------------------------------- 9 files changed, 23 insertions(+), 293 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8362860e21a..23d2f4460de 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -161,8 +161,12 @@ prototypes: int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); @@ -180,8 +184,6 @@ sync_page: no maybe writepages: no set_page_dirty no no readpages: no -prepare_write: no yes yes -commit_write: no yes yes write_begin: no locks the page yes write_end: no yes, unlocks yes perform_write: no n/a yes @@ -191,7 +193,7 @@ releasepage: no yes direct_IO: no launder_page: no yes - ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() + ->write_begin(), ->write_end(), ->sync_page() and ->readpage() may be called from the request handler (/dev/loop). ->readpage() unlocks the page, either synchronously or via I/O diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c4d348dabe9..5579bda58a6 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -492,7 +492,7 @@ written-back to storage typically in whole pages, however the address_space has finer control of write sizes. The read process essentially only requires 'readpage'. The write -process is more complicated and uses prepare_write/commit_write or +process is more complicated and uses write_begin/write_end or set_page_dirty to write data into the address_space, and writepage, sync_page, and writepages to writeback data to storage. @@ -521,8 +521,6 @@ struct address_space_operations { int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -598,37 +596,7 @@ struct address_space_operations { readpages is only used for read-ahead, so read errors are ignored. If anything goes wrong, feel free to give up. - prepare_write: called by the generic write path in VM to set up a write - request for a page. This indicates to the address space that - the given range of bytes is about to be written. The - address_space should check that the write will be able to - complete, by allocating space if necessary and doing any other - internal housekeeping. If the write will update parts of - any basic-blocks on storage, then those blocks should be - pre-read (if they haven't been read already) so that the - updated blocks can be written out properly. - The page will be locked. - - Note: the page _must not_ be marked uptodate in this function - (or anywhere else) unless it actually is uptodate right now. As - soon as a page is marked uptodate, it is possible for a concurrent - read(2) to copy it to userspace. - - commit_write: If prepare_write succeeds, new data will be copied - into the page and then commit_write will be called. It will - typically update the size of the file (if appropriate) and - mark the inode as dirty, and do any other related housekeeping - operations. It should avoid returning an error if possible - - errors should have been handled by prepare_write. - - write_begin: This is intended as a replacement for prepare_write. The - key differences being that: - - it returns a locked page (in *pagep) rather than being - given a pre locked page; - - it must be able to cope with short writes (where the - length passed to write_begin is greater than the number - of bytes copied into the page). - + write_begin: Called by the generic buffered write code to ask the filesystem to prepare to write len bytes at the given offset in the file. The address_space should check that the write will be able to complete, @@ -640,6 +608,9 @@ struct address_space_operations { The filesystem must return the locked pagecache page for the specified offset, in *pagep, for the caller to write into. + It must be able to cope with short writes (where the length passed to + write_begin is greater than the number of bytes copied into the page). + flags is a field for AOP_FLAG_xxx flags, described in include/linux/fs.h. diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f09cd8bcc3..5c4ee70d5cf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -40,8 +40,7 @@ * Heinz Mauelshagen , Feb 2002 * * Support for falling back on the write file operation when the address space - * operations prepare_write and/or commit_write are not available on the - * backing filesystem. + * operations write_begin is not available on the backing filesystem. * Anton Altaparmakov, 16 Feb 2005 * * Still To Fix: @@ -765,7 +764,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, */ if (!file->f_op->splice_read) goto out_putf; - if (aops->prepare_write || aops->write_begin) + if (aops->write_begin) lo_flags |= LO_FLAGS_USE_AOPS; if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 19eafbe3c37..2b2eec1283b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { /* - * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. * * But we must fill the remaining area or hole by nul for diff --git a/fs/libfs.c b/fs/libfs.c index 74688598bcf..e960a832190 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); EXPORT_SYMBOL(simple_link); EXPORT_SYMBOL(simple_lookup); EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_prepare_write); +EXPORT_UNUSED_SYMBOL(simple_prepare_write); EXPORT_SYMBOL(simple_readpage); EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a7807..7efe937a415 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -679,8 +679,7 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->prepare_write() and - * ->commit_write(). */ + * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { diff --git a/fs/splice.c b/fs/splice.c index a1e701c2715..1abab5cee4b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, }; /* - * The actor worker might be calling ->prepare_write and - * ->commit_write. Most of the time, these expect i_mutex to + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to * be held. Since this may result in an ABBA deadlock with * pipe->inode, we have to order lock acquiry here. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430..0dcdd9458f4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -489,13 +489,6 @@ struct address_space_operations { int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - /* - * ext3 requires that a successful prepare_write() call be followed - * by a commit_write() call - they must be balanced - */ - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); - int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af..f3e5f8944d1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, { const struct address_space_operations *aops = mapping->a_ops; - if (aops->write_begin) { - return aops->write_begin(file, mapping, pos, len, flags, + return aops->write_begin(file, mapping, pos, len, flags, pagep, fsdata); - } else { - int ret; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - struct page *page; -again: - page = __grab_cache_page(mapping, index); - *pagep = page; - if (!page) - return -ENOMEM; - - if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { - /* - * There is no way to resolve a short write situation - * for a !Uptodate page (except by double copying in - * the caller done by generic_perform_write_2copy). - * - * Instead, we have to bring it uptodate here. - */ - ret = aops->readpage(file, page); - page_cache_release(page); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) - goto again; - return ret; - } - goto again; - } - - ret = aops->prepare_write(file, page, offset, offset+len); - if (ret) { - unlock_page(page); - page_cache_release(page); - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } - return ret; - } } EXPORT_SYMBOL(pagecache_write_begin); @@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { const struct address_space_operations *aops = mapping->a_ops; - int ret; - - if (aops->write_end) { - mark_page_accessed(page); - ret = aops->write_end(file, mapping, pos, len, copied, - page, fsdata); - } else { - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - - flush_dcache_page(page); - ret = aops->commit_write(file, page, offset, offset+len); - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - - if (ret < 0) { - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } else if (ret > 0) - ret = min_t(size_t, copied, ret); - else - ret = copied; - } - return ret; + mark_page_accessed(page); + return aops->write_end(file, mapping, pos, len, copied, page, fsdata); } EXPORT_SYMBOL(pagecache_write_end); @@ -2226,174 +2163,6 @@ repeat: } EXPORT_SYMBOL(__grab_cache_page); -static ssize_t generic_perform_write_2copy(struct file *file, - struct iov_iter *i, loff_t pos) -{ - struct address_space *mapping = file->f_mapping; - const struct address_space_operations *a_ops = mapping->a_ops; - struct inode *inode = mapping->host; - long status = 0; - ssize_t written = 0; - - do { - struct page *src_page; - struct page *page; - pgoff_t index; /* Pagecache index for current page */ - unsigned long offset; /* Offset into pagecache page */ - unsigned long bytes; /* Bytes to write to page */ - size_t copied; /* Bytes copied from user */ - - offset = (pos & (PAGE_CACHE_SIZE - 1)); - index = pos >> PAGE_CACHE_SHIFT; - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, - iov_iter_count(i)); - - /* - * a non-NULL src_page indicates that we're doing the - * copy via get_user_pages and kmap. - */ - src_page = NULL; - - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. - */ - if (unlikely(iov_iter_fault_in_readable(i, bytes))) { - status = -EFAULT; - break; - } - - page = __grab_cache_page(mapping, index); - if (!page) { - status = -ENOMEM; - break; - } - - /* - * non-uptodate pages cannot cope with short copies, and we - * cannot take a pagefault with the destination page locked. - * So pin the source page to copy it. - */ - if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { - unlock_page(page); - - src_page = alloc_page(GFP_KERNEL); - if (!src_page) { - page_cache_release(page); - status = -ENOMEM; - break; - } - - /* - * Cannot get_user_pages with a page locked for the - * same reason as we can't take a page fault with a - * page locked (as explained below). - */ - copied = iov_iter_copy_from_user(src_page, i, - offset, bytes); - if (unlikely(copied == 0)) { - status = -EFAULT; - page_cache_release(page); - page_cache_release(src_page); - break; - } - bytes = copied; - - lock_page(page); - /* - * Can't handle the page going uptodate here, because - * that means we would use non-atomic usercopies, which - * zero out the tail of the page, which can cause - * zeroes to become transiently visible. We could just - * use a non-zeroing copy, but the APIs aren't too - * consistent. - */ - if (unlikely(!page->mapping || PageUptodate(page))) { - unlock_page(page); - page_cache_release(page); - page_cache_release(src_page); - continue; - } - } - - status = a_ops->prepare_write(file, page, offset, offset+bytes); - if (unlikely(status)) - goto fs_write_aop_error; - - if (!src_page) { - /* - * Must not enter the pagefault handler here, because - * we hold the page lock, so we might recursively - * deadlock on the same lock, or get an ABBA deadlock - * against a different lock, or against the mmap_sem - * (which nests outside the page lock). So increment - * preempt count, and use _atomic usercopies. - * - * The page is uptodate so we are OK to encounter a - * short copy: if unmodified parts of the page are - * marked dirty and written out to disk, it doesn't - * really matter. - */ - pagefault_disable(); - copied = iov_iter_copy_from_user_atomic(page, i, - offset, bytes); - pagefault_enable(); - } else { - void *src, *dst; - src = kmap_atomic(src_page, KM_USER0); - dst = kmap_atomic(page, KM_USER1); - memcpy(dst + offset, src + offset, bytes); - kunmap_atomic(dst, KM_USER1); - kunmap_atomic(src, KM_USER0); - copied = bytes; - } - flush_dcache_page(page); - - status = a_ops->commit_write(file, page, offset, offset+bytes); - if (unlikely(status < 0)) - goto fs_write_aop_error; - if (unlikely(status > 0)) /* filesystem did partial write */ - copied = min_t(size_t, copied, status); - - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - iov_iter_advance(i, copied); - pos += copied; - written += copied; - - balance_dirty_pages_ratelimited(mapping); - cond_resched(); - continue; - -fs_write_aop_error: - unlock_page(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + bytes > inode->i_size) - vmtruncate(inode, inode->i_size); - break; - } while (iov_iter_count(i)); - - return written ? written : status; -} - static ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { @@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, struct iov_iter i; iov_iter_init(&i, iov, nr_segs, count, written); - if (a_ops->write_begin) - status = generic_perform_write(file, &i, pos); - else - status = generic_perform_write_2copy(file, &i, pos); + status = generic_perform_write(file, &i, pos); if (likely(status >= 0)) { written += status; -- cgit v1.2.3 From 7106a27b52940085c2c3f6e42742d3a2a84d872a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Oct 2008 14:01:15 -0700 Subject: kernel.h: fix might_sleep kernel-doc Put the kernel-doc for might_sleep() _immediately_ before the macro (no intervening lines). Otherwise kernel-doc complains like so: Warning(linux-2.6.27-rc3-git2//include/linux/kernel.h:129): No description found for parameter 'file' Warning(linux-2.6.27-rc3-git2//include/linux/kernel.h:129): No description found for parameter 'line' because kernel-doc is looking at the wrong function prototype (i.e., __might_sleep). [Yes, I have a todo note to myself to check/warn for that inconsistency in scripts/kernel-doc.] Signed-off-by: Randy Dunlap Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 396a350b87a..fba141d3ca0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -116,6 +116,8 @@ extern int _cond_resched(void); # define might_resched() do { } while (0) #endif +#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP + void __might_sleep(char *file, int line); /** * might_sleep - annotation for functions that can sleep * @@ -126,8 +128,6 @@ extern int _cond_resched(void); * be bitten later when the calling function happens to sleep when it is not * supposed to. */ -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP - void __might_sleep(char *file, int line); # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) #else -- cgit v1.2.3 From 731572d39fcd3498702eda4600db4c43d51e0b26 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 29 Oct 2008 14:01:20 -0700 Subject: nfsd: fix vm overcommit crash Junjiro R. Okajima reported a problem where knfsd crashes if you are using it to export shmemfs objects and run strict overcommit. In this situation the current->mm based modifier to the overcommit goes through a NULL pointer. We could simply check for NULL and skip the modifier but we've caught other real bugs in the past from mm being NULL here - cases where we did need a valid mm set up (eg the exec bug about a year ago). To preserve the checks and get the logic we want shuffle the checking around and add a new helper to the vm_ security wrappers Also fix a current->mm reference in nommu that should use the passed mm [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix build] Reported-by: Junjiro R. Okajima Acked-by: James Morris Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 6 ++++++ mm/mmap.c | 3 ++- mm/nommu.c | 3 ++- mm/shmem.c | 8 ++++---- security/security.c | 9 +++++++++ 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index f5c4a51eb42..c13f1cec9ab 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1585,6 +1585,7 @@ int security_syslog(int type); int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); +int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); @@ -1820,6 +1821,11 @@ static inline int security_vm_enough_memory(long pages) return cap_vm_enough_memory(current->mm, pages); } +static inline int security_vm_enough_memory_kern(long pages) +{ + return cap_vm_enough_memory(current->mm, pages); +} + static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { return cap_vm_enough_memory(mm, pages); diff --git a/mm/mmap.c b/mm/mmap.c index 74f4d158022..de14ac21e5b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -175,7 +175,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= mm->total_vm / 32; + if (mm) + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space diff --git a/mm/nommu.c b/mm/nommu.c index 2696b24f2bb..7695dc85078 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1454,7 +1454,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; + if (mm) + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space diff --git a/mm/shmem.c b/mm/shmem.c index d38d7e61fcd..0ed075215e5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -161,8 +161,8 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) */ static inline int shmem_acct_size(unsigned long flags, loff_t size) { - return (flags & VM_ACCOUNT)? - security_vm_enough_memory(VM_ACCT(size)): 0; + return (flags & VM_ACCOUNT) ? + security_vm_enough_memory_kern(VM_ACCT(size)) : 0; } static inline void shmem_unacct_size(unsigned long flags, loff_t size) @@ -179,8 +179,8 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) */ static inline int shmem_acct_block(unsigned long flags) { - return (flags & VM_ACCOUNT)? - 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); + return (flags & VM_ACCOUNT) ? + 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); } static inline void shmem_unacct_blocks(unsigned long flags, long pages) diff --git a/security/security.c b/security/security.c index 255b08559b2..c0acfa7177e 100644 --- a/security/security.c +++ b/security/security.c @@ -198,14 +198,23 @@ int security_settime(struct timespec *ts, struct timezone *tz) int security_vm_enough_memory(long pages) { + WARN_ON(current->mm == NULL); return security_ops->vm_enough_memory(current->mm, pages); } int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { + WARN_ON(mm == NULL); return security_ops->vm_enough_memory(mm, pages); } +int security_vm_enough_memory_kern(long pages) +{ + /* If current->mm is a kernel thread then we will pass NULL, + for this specific case that is fine */ + return security_ops->vm_enough_memory(current->mm, pages); +} + int security_bprm_alloc(struct linux_binprm *bprm) { return security_ops->bprm_alloc_security(bprm); -- cgit v1.2.3 From effdb9492de01a51f8123e62e87e3330688f9bf1 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Wed, 29 Oct 2008 14:01:21 -0700 Subject: spi: fix compile error Fix compile error below: LD drivers/spi/built-in.o CC [M] drivers/spi/spi_gpio.o In file included from drivers/spi/spi_gpio.c:26: include/linux/spi/spi_bitbang.h:23: error: field `work' has incomplete type make[2]: *** [drivers/spi/spi_gpio.o] Error 1 make[1]: *** [drivers/spi] Error 2 make: *** [drivers] Error 2 Signed-off-by: Fernando Luis Vazquez Cao Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_bitbang.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index b8db32cea1d..bf8de281b4e 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -18,6 +18,9 @@ * duplex (MicroWire) controllers. Provide chipslect() and txrx_bufs(), * and custom setup()/cleanup() methods. */ + +#include + struct spi_bitbang { struct workqueue_struct *workqueue; struct work_struct work; -- cgit v1.2.3 From c132419e560a2ecd3c8cf77f9c37e103e74b3754 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 30 Oct 2008 18:17:06 -0700 Subject: gianfar: Fix race in TBI/SerDes configuration The init_phy() function attaches to the PHY, then configures the SerDes<->TBI link (in SGMII mode). The TBI is on the MDIO bus with the PHY (sort of) and is accessed via the gianfar's MDIO registers, using the functions gfar_local_mdio_read/write(), which don't do any locking. The previously attached PHY will start a work-queue on a timer, and probably an irq handler as well, which will talk to the PHY and thus use the MDIO bus. This uses phy_read/write(), which have locking, but not against the gfar_local_mdio versions. The result is that PHY code will try to use the MDIO bus at the same time as the SerDes setup code, corrupting the transfers. Setting up the SerDes before attaching to the PHY will insure that there is no race between the SerDes code and *our* PHY, but doesn't fix everything. Typically the PHYs for all gianfar devices are on the same MDIO bus, which is associated with the first gianfar device. This means that the first gianfar's SerDes code could corrupt the MDIO transfers for a different gianfar's PHY. The lock used by phy_read/write() is contained in the mii_bus structure, which is pointed to by the PHY. This is difficult to access from the gianfar drivers, as there is no link between a gianfar device and the mii_bus which shares the same MDIO registers. As far as the device layer and drivers are concerned they are two unrelated devices (which happen to share registers). Generally all gianfar devices' PHYs will be on the bus associated with the first gianfar. But this might not be the case, so simply locking the gianfar's PHY's mii bus might not lock the mii bus that the SerDes setup code is going to use. We solve this by having the code that creates the gianfar platform device look in the device tree for an mdio device that shares the gianfar's registers. If one is found the ID of its platform device is saved in the gianfar's platform data. A new function in the gianfar mii code, gfar_get_miibus(), can use the bus ID to search through the platform devices for a gianfar_mdio device with the right ID. The platform device's driver data is the mii_bus structure, which the SerDes setup code can use to lock the current bus. Signed-off-by: Trent Piepho CC: Andy Fleming Signed-off-by: Jeff Garzik --- arch/powerpc/sysdev/fsl_soc.c | 26 ++++++++++++++++++++++++++ drivers/net/gianfar.c | 7 +++++++ drivers/net/gianfar_mii.c | 21 +++++++++++++++++++++ drivers/net/gianfar_mii.h | 3 +++ include/linux/fsl_devices.h | 3 ++- 5 files changed, 59 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 01b884b2569..26ecb96f973 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -223,6 +223,8 @@ static int gfar_mdio_of_init_one(struct device_node *np) if (ret) return ret; + /* The gianfar device will try to use the same ID created below to find + * this bus, to coordinate register access (since they share). */ mdio_dev = platform_device_register_simple("fsl-gianfar_mdio", res.start&0xfffff, &res, 1); if (IS_ERR(mdio_dev)) @@ -394,6 +396,30 @@ static int __init gfar_of_init(void) of_node_put(mdio); } + /* Get MDIO bus controlled by this eTSEC, if any. Normally only + * eTSEC 1 will control an MDIO bus, not necessarily the same + * bus that its PHY is on ('mdio' above), so we can't just use + * that. What we do is look for a gianfar mdio device that has + * overlapping registers with this device. That's really the + * whole point, to find the device sharing our registers to + * coordinate access with it. + */ + for_each_compatible_node(mdio, NULL, "fsl,gianfar-mdio") { + if (of_address_to_resource(mdio, 0, &res)) + continue; + + if (res.start >= r[0].start && res.end <= r[0].end) { + /* Get the ID the mdio bus platform device was + * registered with. gfar_data.bus_id is + * different because it's for finding a PHY, + * while this is for finding a MII bus. + */ + gfar_data.mdio_bus = res.start&0xfffff; + of_node_put(mdio); + break; + } + } + ret = platform_device_add_data(gfar_dev, &gfar_data, sizeof(struct diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 64b201134fd..249541a1814 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -586,6 +586,10 @@ static void gfar_configure_serdes(struct net_device *dev) struct gfar_mii __iomem *regs = (void __iomem *)&priv->regs->gfar_mii_regs; int tbipa = gfar_read(&priv->regs->tbipa); + struct mii_bus *bus = gfar_get_miibus(priv); + + if (bus) + mutex_lock(&bus->mdio_lock); /* Single clk mode, mii mode off(for serdes communication) */ gfar_local_mdio_write(regs, tbipa, MII_TBICON, TBICON_CLK_SELECT); @@ -596,6 +600,9 @@ static void gfar_configure_serdes(struct net_device *dev) gfar_local_mdio_write(regs, tbipa, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); + + if (bus) + mutex_unlock(&bus->mdio_lock); } static void init_registers(struct net_device *dev) diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index bf73eea9801..0e2595d2493 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -269,6 +269,27 @@ static struct device_driver gianfar_mdio_driver = { .remove = gfar_mdio_remove, }; +static int match_mdio_bus(struct device *dev, void *data) +{ + const struct gfar_private *priv = data; + const struct platform_device *pdev = to_platform_device(dev); + + return !strcmp(pdev->name, gianfar_mdio_driver.name) && + pdev->id == priv->einfo->mdio_bus; +} + +/* Given a gfar_priv structure, find the mii_bus controlled by this device (not + * necessarily the same as the bus the gfar's PHY is on), if one exists. + * Normally only the first gianfar controls a mii_bus. */ +struct mii_bus *gfar_get_miibus(const struct gfar_private *priv) +{ + /*const*/ struct device *d; + + d = bus_find_device(gianfar_mdio_driver.bus, NULL, (void *)priv, + match_mdio_bus); + return d ? dev_get_drvdata(d) : NULL; +} + int __init gfar_mdio_init(void) { return driver_register(&gianfar_mdio_driver); diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h index 2af28b16a0e..02dc970ca1f 100644 --- a/drivers/net/gianfar_mii.h +++ b/drivers/net/gianfar_mii.h @@ -18,6 +18,8 @@ #ifndef __GIANFAR_MII_H #define __GIANFAR_MII_H +struct gfar_private; /* forward ref */ + #define MIIMIND_BUSY 0x00000001 #define MIIMIND_NOTVALID 0x00000004 @@ -44,6 +46,7 @@ int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id, int regnum, u16 value); int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum); +struct mii_bus *gfar_get_miibus(const struct gfar_private *priv); int __init gfar_mdio_init(void); void gfar_mdio_exit(void); #endif /* GIANFAR_PHY_H */ diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 4e625e0094c..708bab58d8d 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -49,7 +49,8 @@ struct gianfar_platform_data { u32 device_flags; /* board specific information */ u32 board_flags; - char bus_id[MII_BUS_ID_SIZE]; + int mdio_bus; /* Bus controlled by us */ + char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ u32 phy_id; u8 mac_addr[6]; phy_interface_t interface; -- cgit v1.2.3 From ad1d967c88e349c7e822ad75dd3247a2a50d2ea3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Oct 2008 23:54:35 -0700 Subject: net: delete excess kernel-doc notation Remove excess kernel-doc function parameters from networking header & driver files: Warning(include/net/sock.h:946): Excess function parameter or struct member 'sk' description in 'sk_filter_release' Warning(include/linux/netdevice.h:1545): Excess function parameter or struct member 'cpu' description in 'netif_tx_lock' Warning(drivers/net/wan/z85230.c:712): Excess function parameter or struct member 'regs' description in 'z8530_interrupt' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/wan/z85230.c | 1 - include/linux/netdevice.h | 1 - include/net/sock.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include') diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index ccd9cd35ecb..5bf7e01ef0e 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -695,7 +695,6 @@ EXPORT_SYMBOL(z8530_nop); * z8530_interrupt - Handle an interrupt from a Z8530 * @irq: Interrupt number * @dev_id: The Z8530 device that is interrupting. - * @regs: unused * * A Z85[2]30 device has stuck its hand in the air for attention. * We scan both the channels on the chip for events and then call diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8bcb59adfd..9d77b1d7dca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1537,7 +1537,6 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /** * netif_tx_lock - grab network device transmit lock * @dev: network device - * @cpu: cpu number of lock owner * * Get network device transmit lock */ diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09..c04f9e18ea2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -936,7 +936,6 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); /** * sk_filter_release: Release a socket filter - * @sk: socket * @fp: filter to remove * * Remove a filter from a socket and release its resources. -- cgit v1.2.3 From 485ac57bc1238719b1508f91b0f9eeda4a3c84bb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 30 Oct 2008 23:55:16 -0700 Subject: netns: add register_pernet_gen_subsys/unregister_pernet_gen_subsys netns ops which are registered with register_pernet_gen_device() are shutdown strictly before those which are registered with register_pernet_subsys(). Sometimes this leads to opposite (read: buggy) shutdown ordering between two modules. Add register_pernet_gen_subsys()/unregister_pernet_gen_subsys() for modules which aren't elite enough for entry in struct net, and which can't use register_pernet_gen_device(). PPTP conntracking module is such one. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 ++ net/core/net_namespace.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 708009be88b..700c53a3c6f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -214,6 +214,8 @@ struct pernet_operations { extern int register_pernet_subsys(struct pernet_operations *); extern void unregister_pernet_subsys(struct pernet_operations *); +extern int register_pernet_gen_subsys(int *id, struct pernet_operations *); +extern void unregister_pernet_gen_subsys(int id, struct pernet_operations *); extern int register_pernet_device(struct pernet_operations *); extern void unregister_pernet_device(struct pernet_operations *); extern int register_pernet_gen_device(int *id, struct pernet_operations *); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f1d07b5c1e1..1895a4ca9c4 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -325,6 +325,38 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); +int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) +{ + int rv; + + mutex_lock(&net_mutex); +again: + rv = ida_get_new_above(&net_generic_ids, 1, id); + if (rv < 0) { + if (rv == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + goto out; + } + rv = register_pernet_operations(first_device, ops); + if (rv < 0) + ida_remove(&net_generic_ids, *id); + mutex_unlock(&net_mutex); +out: + return rv; +} +EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); + +void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(ops); + ida_remove(&net_generic_ids, id); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); + /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem -- cgit v1.2.3