Merge Linaro Android branch ' jstultz/linaro-android-3.4

' into integration-android-ux500 Signed-off-by: Philippe Langlais <philippe.langlais@stericsson.com>
author: Philippe Langlais <philippe.langlais@stericsson.com> 2012-06-04 19:46:24 +0800
committer: Philippe Langlais <philippe.langlais@stericsson.com> 2012-06-04 19:46:24 +0800
commit: 29a3e1ef73daaa91cbf5cce2fb475f1878254402 (patch)
tree: fbd1df992706a4786633a7986a31b9cf78b3dced /kernel
parent: 564119271649aa2d1840e79badf743e957b497e6 (diff)
parent: 3c16bd7bb0a2b00b71f65bb0075e0c94d0fcc777 (diff)
18 files changed, 688 insertions, 92 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ed64ccac67c..2f0d7542c0d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -287,6 +287,33 @@ static void cgroup_release_agent(struct work_struct *work);
 static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+		wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
 /* Link structure for associating css_set objects with cgroups */
 struct cg_cgroup_link {
 	/*
@@ -346,52 +373,43 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	return &css_set_table[index];
 }
 
-/* We don't maintain the lists running through each css_set to its
- * task until after the first call to cgroup_iter_start(). This
- * reduces the fork()/exit() overhead for people who have cgroups
- * compiled into their kernel but not actually in use */
-static int use_task_css_set_links __read_mostly;
-
-static void __put_css_set(struct css_set *cg, int taskexit)
+static void free_css_set_work(struct work_struct *work)
 {
+	struct css_set *cg = container_of(work, struct css_set, work);
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
-	/*
-	 * Ensure that the refcount doesn't hit zero while any readers
-	 * can see it. Similar to atomic_dec_and_lock(), but for an
-	 * rwlock
-	 */
-	if (atomic_add_unless(&cg->refcount, -1, 1))
-		return;
-	write_lock(&css_set_lock);
-	if (!atomic_dec_and_test(&cg->refcount)) {
-		write_unlock(&css_set_lock);
-		return;
-	}
-
-	/* This css_set is dead. unlink it and release cgroup refcounts */
-	hlist_del(&cg->hlist);
-	css_set_count--;
 
+	write_lock(&css_set_lock);
 	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
 				 cg_link_list) {
 		struct cgroup *cgrp = link->cgrp;
 		list_del(&link->cg_link_list);
 		list_del(&link->cgrp_link_list);
-		if (atomic_dec_and_test(&cgrp->count) &&
-		    notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		if (atomic_dec_and_test(&cgrp->count)) {
 			check_for_release(cgrp);
+			cgroup_wakeup_rmdir_waiter(cgrp);
 		}
-
 		kfree(link);
 	}
-
 	write_unlock(&css_set_lock);
-	kfree_rcu(cg, rcu_head);
+
+	kfree(cg);
 }
 
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+
+	INIT_WORK(&cg->work, free_css_set_work);
+	schedule_work(&cg->work);
+}
+
+/* We don't maintain the lists running through each css_set to its
+ * task until after the first call to cgroup_iter_start(). This
+ * reduces the fork()/exit() overhead for people who have cgroups
+ * compiled into their kernel but not actually in use */
+static int use_task_css_set_links __read_mostly;
+
 /*
  * refcounted get/put for css_set objects
  */
@@ -400,14 +418,26 @@ static inline void get_css_set(struct css_set *cg)
 	atomic_inc(&cg->refcount);
 }
 
-static inline void put_css_set(struct css_set *cg)
+static void put_css_set(struct css_set *cg)
 {
-	__put_css_set(cg, 0);
-}
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
+	write_lock(&css_set_lock);
+	if (!atomic_dec_and_test(&cg->refcount)) {
+		write_unlock(&css_set_lock);
+		return;
+	}
 
-static inline void put_css_set_taskexit(struct css_set *cg)
-{
-	__put_css_set(cg, 1);
+	hlist_del(&cg->hlist);
+	css_set_count--;
+
+	write_unlock(&css_set_lock);
+	call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
@@ -739,9 +769,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
  * another.  It does so using cgroup_mutex, however there are
  * several performance critical places that need to reference
- * task->cgroup without the expense of grabbing a system global
+ * task->cgroups without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task's cgroups pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
@@ -931,33 +961,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 }
 
 /*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-		wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-	css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-	cgroup_wakeup_rmdir_waiter(css->cgroup);
-	css_put(css);
-}
-
-/*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
  * returns an error, no reference counts are touched.
@@ -1889,6 +1892,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	struct cgroupfs_root *root = cgrp->root;
 	struct cgroup_taskset tset = { };
 	struct css_set *newcg;
+	struct css_set *cg;
 
 	/* @tsk either already exited or can't exit until the end */
 	if (tsk->flags & PF_EXITING)
@@ -1924,14 +1928,20 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		goto out;
 	}
 
+	task_lock(tsk);
+	cg = tsk->cgroups;
+	get_css_set(cg);
+	task_unlock(tsk);
+
 	cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
 
 	for_each_subsys(root, ss) {
 		if (ss->attach)
 			ss->attach(cgrp, &tset);
 	}
-
-	synchronize_rcu();
+	set_bit(CGRP_RELEASABLE, &cgrp->flags);
+	/* put_css_set will not destroy cg until after an RCU grace period */
+	put_css_set(cg);
 
 	/*
 	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -2132,6 +2142,24 @@ out_free_group_list:
 	return retval;
 }
 
+static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	struct cgroup_subsys *ss;
+	int ret;
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->allow_attach) {
+			ret = ss->allow_attach(cgrp, tset);
+			if (ret)
+				return ret;
+		} else {
+			return -EACCES;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will lock
@@ -2163,9 +2191,18 @@ retry_find_task:
 		if (cred->euid &&
 		    cred->euid != tcred->uid &&
 		    cred->euid != tcred->suid) {
-			rcu_read_unlock();
-			ret = -EACCES;
-			goto out_unlock_cgroup;
+			/*
+			 * if the default permission check fails, give each
+			 * cgroup a chance to extend the permission check
+			 */
+			struct cgroup_taskset tset = { };
+			tset.single.task = tsk;
+			tset.single.cgrp = cgrp;
+			ret = cgroup_allow_attach(cgrp, &tset);
+			if (ret) {
+				rcu_read_unlock();
+				goto out_unlock_cgroup;
+			}
 		}
 	} else
 		tsk = current;
@@ -3784,6 +3821,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (err < 0)
 		goto err_remove;
 
+	set_bit(CGRP_RELEASABLE, &parent->flags);
+
 	/* The cgroup directory was pre-locked for us */
 	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
 
@@ -3915,6 +3954,21 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
 	return !failed;
 }
 
+/* checks if all of the css_sets attached to a cgroup have a refcount of 0.
+ * Must be called with css_set_lock held */
+static int cgroup_css_sets_empty(struct cgroup *cgrp)
+{
+	struct cg_cgroup_link *link;
+
+	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+		if (atomic_read(&cg->refcount) > 0)
+			return 0;
+	}
+
+	return 1;
+}
+
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cgroup *cgrp = dentry->d_fsdata;
@@ -3927,7 +3981,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	/* the vfs holds both inode->i_mutex already */
 again:
 	mutex_lock(&cgroup_mutex);
-	if (atomic_read(&cgrp->count) != 0) {
+	if (!cgroup_css_sets_empty(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3960,7 +4014,7 @@ again:
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
-	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+	if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) {
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
@@ -4000,7 +4054,6 @@ again:
 	cgroup_d_remove_dir(d);
 	dput(d);
 
-	set_bit(CGRP_RELEASABLE, &parent->flags);
 	check_for_release(parent);
 
 	/*
@@ -4631,7 +4684,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	task_unlock(tsk);
 
 	if (cg)
-		put_css_set_taskexit(cg);
+		put_css_set(cg);
 }
 
 /**
@@ -4685,6 +4738,14 @@ static void check_for_release(struct cgroup *cgrp)
 }
 
 /* Caller must verify that the css is not for root cgroup */
+void __css_get(struct cgroup_subsys_state *css, int count)
+{
+	atomic_add(count, &css->refcnt);
+	set_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+}
+EXPORT_SYMBOL_GPL(__css_get);
+
+/* Caller must verify that the css is not for root cgroup */
 void __css_put(struct cgroup_subsys_state *css, int count)
 {
 	struct cgroup *cgrp = css->cgroup;
@@ -4692,10 +4753,7 @@ void __css_put(struct cgroup_subsys_state *css, int count)
 	rcu_read_lock();
 	val = atomic_sub_return(count, &css->refcnt);
 	if (val == 1) {
-		if (notify_on_release(cgrp)) {
-			set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
-		}
+		check_for_release(cgrp);
 		cgroup_wakeup_rmdir_waiter(cgrp);
 	}
 	rcu_read_unlock();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2060c6e5702..fb4a5acc016 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -668,3 +668,23 @@ void init_cpu_online(const struct cpumask *src)
 {
 	cpumask_copy(to_cpumask(cpu_online_bits), src);
 }
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+	atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+	atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0557f24c6bc..35b94acec62 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -85,6 +85,10 @@ static int kgdb_use_con;
 bool dbg_is_early = true;
 /* Next cpu to become the master debug core */
 int dbg_switch_cpu;
+/* Flag for entering kdb when a panic occurs */
+static bool break_on_panic = true;
+/* Flag for entering kdb when an exception occurs */
+static bool break_on_exception = true;
 
 /* Use kdb or gdbserver mode */
 int dbg_kdb_mode = 1;
@@ -99,6 +103,8 @@ early_param("kgdbcon", opt_kgdb_con);
 
 module_param(kgdb_use_con, int, 0644);
 module_param(kgdbreboot, int, 0644);
+module_param(break_on_panic, bool, 0644);
+module_param(break_on_exception, bool, 0644);
 
 /*
  * Holds information about breakpoints in a kernel. These breakpoints are
@@ -673,6 +679,9 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 	struct kgdb_state kgdb_var;
 	struct kgdb_state *ks = &kgdb_var;
 
+	if (unlikely(signo != SIGTRAP && !break_on_exception))
+		return 1;
+
 	ks->cpu			= raw_smp_processor_id();
 	ks->ex_vector		= evector;
 	ks->signo		= signo;
@@ -759,6 +768,9 @@ static int kgdb_panic_event(struct notifier_block *self,
 			    unsigned long val,
 			    void *data)
 {
+	if (!break_on_panic)
+		return NOTIFY_DONE;
+
 	if (dbg_kdb_mode)
 		kdb_printf("PANIC: %s\n", (char *)data);
 	kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index bb9520f0f6f..18a4cb33c52 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
 	int i;
 	int diag, dtab_count;
 	int key;
-
+	static int last_crlf;
 
 	diag = kdbgetintenv("DTABCOUNT", &dtab_count);
 	if (diag)
@@ -237,6 +237,9 @@ poll_again:
 		return buffer;
 	if (key != 9)
 		tab = 0;
+	if (key != 10 && key != 13)
+		last_crlf = 0;
+
 	switch (key) {
 	case 8: /* backspace */
 		if (cp > buffer) {
@@ -254,7 +257,12 @@ poll_again:
 			*cp = tmp;
 		}
 		break;
-	case 13: /* enter */
+	case 10: /* new line */
+	case 13: /* carriage return */
+		/* handle \n after \r */
+		if (last_crlf && last_crlf != key)
+			break;
+		last_crlf = key;
 		*lastchar++ = '\n';
 		*lastchar++ = '\0';
 		if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 687a15d5624..c0bf8c7002a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,6 +158,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
 static void account_kernel_stack(struct thread_info *ti, int account)
 {
 	struct zone *zone = page_zone(virt_to_page(ti));
@@ -188,6 +191,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+int task_free_register(struct notifier_block *n)
+{
+	return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+	return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
@@ -199,6 +214,7 @@ void __put_task_struct(struct task_struct *tsk)
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
 
+	atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
@@ -676,7 +692,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 
 	mm = get_task_mm(task);
 	if (mm && mm != current->mm &&
-			!ptrace_may_access(task, mode)) {
+			!ptrace_may_access(task, mode) &&
+			!capable(CAP_SYS_RESOURCE)) {
 		mmput(mm);
 		mm = ERR_PTR(-EACCES);
 	}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 15e53b1766a..fe4b09cf829 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -104,8 +104,13 @@ int check_wakeup_irqs(void)
 
 	for_each_irq_desc(irq, desc) {
 		if (irqd_is_wakeup_set(&desc->irq_data)) {
-			if (desc->istate & IRQS_PENDING)
+			if (desc->istate & IRQS_PENDING) {
+				pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
+					irq,
+					desc->action && desc->action->name ?
+					desc->action->name : "");
 				return -EBUSY;
+			}
 			continue;
 		}
 		/*
diff --git a/kernel/panic.c b/kernel/panic.c
index 4d53e8b9830..c4093d317bd 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,13 +27,19 @@
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
 
+/* Machine specific panic information string */
+char *mach_panic_string;
+
 int panic_on_oops;
 static unsigned long tainted_mask;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 
-int panic_timeout;
+#ifndef CONFIG_PANIC_TIMEOUT
+#define CONFIG_PANIC_TIMEOUT 0
+#endif
+int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -375,6 +381,11 @@ late_initcall(init_oops_id);
 void print_oops_end_marker(void)
 {
 	init_oops_id();
+
+	if (mach_panic_string)
+		printk(KERN_WARNING "Board Information: %s\n",
+		       mach_panic_string);
+
 	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
 		(unsigned long long)oops_id);
 }
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index deb5461e321..63cad888e24 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,6 +18,24 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HAS_WAKELOCK
+	bool
+	default n
+
+config WAKELOCK
+	bool
+	default n
+
+config USER_WAKELOCK
+	bool "Userspace wake locks"
+	depends on PM_SLEEP
+	default n
+	---help---
+	  User-space wake lock api. Write "lockname" or "lockname timeout"
+	  to /sys/power/wake_lock lock and if needed create a wake lock.
+	  Write "lockname" to /sys/power/wake_unlock to unlock a user wake
+	  lock.
+
 config HIBERNATE_CALLBACKS
 	bool
 
@@ -243,3 +261,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
 config CPU_PM
 	bool
 	depends on SUSPEND || CPU_IDLE
+
+config SUSPEND_TIME
+	bool "Log time spent in suspend"
+	---help---
+	  Prints the time spent in suspend in the kernel log, and
+	  keeps statistics on the time spent in suspend in
+	  /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 66d808ec525..7f51f84d530 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -9,5 +9,7 @@ obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
 obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o user.o \
 				   block_io.o
+obj-$(CONFIG_USER_WAKELOCK)	+= userwakelock.o
+obj-$(CONFIG_SUSPEND_TIME)	+= suspend_time.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 1c12581f1c6..7f3c91a634c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -400,6 +400,11 @@ power_attr(pm_trace_dev_match);
 
 #endif /* CONFIG_PM_TRACE */
 
+#ifdef CONFIG_USER_WAKELOCK
+power_attr(wake_lock);
+power_attr(wake_unlock);
+#endif
+
 static struct attribute * g[] = {
 	&state_attr.attr,
 #ifdef CONFIG_PM_TRACE
@@ -412,6 +417,10 @@ static struct attribute * g[] = {
 #ifdef CONFIG_PM_DEBUG
 	&pm_test_attr.attr,
 #endif
+#ifdef CONFIG_USER_WAKELOCK
+	&wake_lock_attr.attr,
+	&wake_unlock_attr.attr,
+#endif
 #endif
 	NULL,
 };
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 98f3622d740..c4ecb81cb4f 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -264,3 +264,14 @@ static inline void suspend_thaw_processes(void)
 {
 }
 #endif
+
+#ifdef CONFIG_USER_WAKELOCK
+ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf);
+ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t n);
+ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf);
+ssize_t  wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t n);
+#endif
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 00000000000..d2a65da9f22
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/syscore_ops.h>
+#include <linux/time.h>
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+	int bin;
+	seq_printf(s, "time (secs)  count\n");
+	seq_printf(s, "------------------\n");
+	for (bin = 0; bin < 32; bin++) {
+		if (time_in_suspend_bins[bin] == 0)
+			continue;
+		seq_printf(s, "%4d - %4d %4u\n",
+			bin ? 1 << (bin - 1) : 0, 1 << bin,
+				time_in_suspend_bins[bin]);
+	}
+	return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+	.open		= suspend_time_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_file("suspend_time", 0755, NULL, NULL,
+		&suspend_time_debug_fops);
+	if (!d) {
+		pr_err("Failed to create suspend_time debug file\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+	read_persistent_clock(&suspend_time_before);
+
+	return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+	struct timespec after;
+
+	read_persistent_clock(&after);
+
+	after = timespec_sub(after, suspend_time_before);
+
+	time_in_suspend_bins[fls(after.tv_sec)]++;
+
+	pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+		after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+	.suspend = suspend_time_syscore_suspend,
+	.resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)
+{
+	register_syscore_ops(&suspend_time_syscore_ops);
+
+	return 0;
+}
+
+static void suspend_time_syscore_exit(void)
+{
+	unregister_syscore_ops(&suspend_time_syscore_ops);
+}
+module_init(suspend_time_syscore_init);
+module_exit(suspend_time_syscore_exit);
diff --git a/kernel/power/userwakelock.c b/kernel/power/userwakelock.c
new file mode 100644
index 00000000000..a28a8db4146
--- /dev/null
+++ b/kernel/power/userwakelock.c
@@ -0,0 +1,219 @@
+/* kernel/power/userwakelock.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/wakelock.h>
+#include <linux/slab.h>
+
+#include "power.h"
+
+enum {
+	DEBUG_FAILURE	= BIT(0),
+	DEBUG_ERROR	= BIT(1),
+	DEBUG_NEW	= BIT(2),
+	DEBUG_ACCESS	= BIT(3),
+	DEBUG_LOOKUP	= BIT(4),
+};
+static int debug_mask = DEBUG_FAILURE;
+module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP);
+
+static DEFINE_MUTEX(tree_lock);
+
+struct user_wake_lock {
+	struct rb_node		node;
+	struct wake_lock	wake_lock;
+	char			name[0];
+};
+struct rb_root user_wake_locks;
+
+static struct user_wake_lock *lookup_wake_lock_name(
+	const char *buf, int allocate, long *timeoutptr)
+{
+	struct rb_node **p = &user_wake_locks.rb_node;
+	struct rb_node *parent = NULL;
+	struct user_wake_lock *l;
+	int diff;
+	u64 timeout;
+	int name_len;
+	const char *arg;
+
+	/* Find length of lock name and start of optional timeout string */
+	arg = buf;
+	while (*arg && !isspace(*arg))
+		arg++;
+	name_len = arg - buf;
+	if (!name_len)
+		goto bad_arg;
+	while (isspace(*arg))
+		arg++;
+
+	/* Process timeout string */
+	if (timeoutptr && *arg) {
+		timeout = simple_strtoull(arg, (char **)&arg, 0);
+		while (isspace(*arg))
+			arg++;
+		if (*arg)
+			goto bad_arg;
+		/* convert timeout from nanoseconds to jiffies > 0 */
+		timeout += (NSEC_PER_SEC / HZ) - 1;
+		do_div(timeout, (NSEC_PER_SEC / HZ));
+		if (timeout <= 0)
+			timeout = 1;
+		*timeoutptr = timeout;
+	} else if (*arg)
+		goto bad_arg;
+	else if (timeoutptr)
+		*timeoutptr = 0;
+
+	/* Lookup wake lock in rbtree */
+	while (*p) {
+		parent = *p;
+		l = rb_entry(parent, struct user_wake_lock, node);
+		diff = strncmp(buf, l->name, name_len);
+		if (!diff && l->name[name_len])
+			diff = -1;
+		if (debug_mask & DEBUG_ERROR)
+			pr_info("lookup_wake_lock_name: compare %.*s %s %d\n",
+				name_len, buf, l->name, diff);
+
+		if (diff < 0)
+			p = &(*p)->rb_left;
+		else if (diff > 0)
+			p = &(*p)->rb_right;
+		else
+			return l;
+	}
+
+	/* Allocate and add new wakelock to rbtree */
+	if (!allocate) {
+		if (debug_mask & DEBUG_ERROR)
+			pr_info("lookup_wake_lock_name: %.*s not found\n",
+				name_len, buf);
+		return ERR_PTR(-EINVAL);
+	}
+	l = kzalloc(sizeof(*l) + name_len + 1, GFP_KERNEL);
+	if (l == NULL) {
+		if (debug_mask & DEBUG_FAILURE)
+			pr_err("lookup_wake_lock_name: failed to allocate "
+				"memory for %.*s\n", name_len, buf);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(l->name, buf, name_len);
+	if (debug_mask & DEBUG_NEW)
+		pr_info("lookup_wake_lock_name: new wake lock %s\n", l->name);
+	wake_lock_init(&l->wake_lock, WAKE_LOCK_SUSPEND, l->name);
+	rb_link_node(&l->node, parent, p);
+	rb_insert_color(&l->node, &user_wake_locks);
+	return l;
+
+bad_arg:
+	if (debug_mask & DEBUG_ERROR)
+		pr_info("lookup_wake_lock_name: wake lock, %.*s, bad arg, %s\n",
+			name_len, buf, arg);
+	return ERR_PTR(-EINVAL);
+}
+
+ssize_t wake_lock_show(
+	struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	char *end = buf + PAGE_SIZE;
+	struct rb_node *n;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+
+	for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) {
+		l = rb_entry(n, struct user_wake_lock, node);
+		if (wake_lock_active(&l->wake_lock))
+			s += scnprintf(s, end - s, "%s ", l->name);
+	}
+	s += scnprintf(s, end - s, "\n");
+
+	mutex_unlock(&tree_lock);
+	return (s - buf);
+}
+
+ssize_t wake_lock_store(
+	struct kobject *kobj, struct kobj_attribute *attr,
+	const char *buf, size_t n)
+{
+	long timeout;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+	l = lookup_wake_lock_name(buf, 1, &timeout);
+	if (IS_ERR(l)) {
+		n = PTR_ERR(l);
+		goto bad_name;
+	}
+
+	if (debug_mask & DEBUG_ACCESS)
+		pr_info("wake_lock_store: %s, timeout %ld\n", l->name, timeout);
+
+	if (timeout)
+		wake_lock_timeout(&l->wake_lock, timeout);
+	else
+		wake_lock(&l->wake_lock);
+bad_name:
+	mutex_unlock(&tree_lock);
+	return n;
+}
+
+
+ssize_t wake_unlock_show(
+	struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	char *end = buf + PAGE_SIZE;
+	struct rb_node *n;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+
+	for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) {
+		l = rb_entry(n, struct user_wake_lock, node);
+		if (!wake_lock_active(&l->wake_lock))
+			s += scnprintf(s, end - s, "%s ", l->name);
+	}
+	s += scnprintf(s, end - s, "\n");
+
+	mutex_unlock(&tree_lock);
+	return (s - buf);
+}
+
+ssize_t wake_unlock_store(
+	struct kobject *kobj, struct kobj_attribute *attr,
+	const char *buf, size_t n)
+{
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+	l = lookup_wake_lock_name(buf, 0, NULL);
+	if (IS_ERR(l)) {
+		n = PTR_ERR(l);
+		goto not_found;
+	}
+
+	if (debug_mask & DEBUG_ACCESS)
+		pr_info("wake_unlock_store: %s\n", l->name);
+
+	wake_unlock(&l->wake_lock);
+not_found:
+	mutex_unlock(&tree_lock);
+	return n;
+}
+
diff --git a/kernel/printk.c b/kernel/printk.c
index c538c187618..cce10a33c8f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -298,6 +298,53 @@ static inline void boot_delay_msec(void)
 }
 #endif
 
+/*
+ * Return the number of unread characters in the log buffer.
+ */
+static int log_buf_get_len(void)
+{
+	return logged_chars;
+}
+
+/*
+ * Clears the ring-buffer
+ */
+void log_buf_clear(void)
+{
+	logged_chars = 0;
+}
+
+/*
+ * Copy a range of characters from the log buffer.
+ */
+int log_buf_copy(char *dest, int idx, int len)
+{
+	int ret, max;
+	bool took_lock = false;
+
+	if (!oops_in_progress) {
+		raw_spin_lock_irq(&logbuf_lock);
+		took_lock = true;
+	}
+
+	max = log_buf_get_len();
+	if (idx < 0 || idx >= max) {
+		ret = -1;
+	} else {
+		if (len > max - idx)
+			len = max - idx;
+		ret = len;
+		idx += (log_end - max);
+		while (len-- > 0)
+			dest[len] = LOG_BUF(idx + len);
+	}
+
+	if (took_lock)
+		raw_spin_unlock_irq(&logbuf_lock);
+
+	return ret;
+}
+
 #ifdef CONFIG_SECURITY_DMESG_RESTRICT
 int dmesg_restrict = 1;
 #else
@@ -1173,7 +1220,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-	case CPU_DYING:
 	case CPU_DOWN_FAILED:
 	case CPU_UP_CANCELED:
 		console_lock();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e5212ae294f..9d35dfeda3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7075,13 +7075,24 @@ static inline int preempt_count_equals(int preempt_offset)
 	return (nested == preempt_offset);
 }
 
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+	__might_sleep_init_called = 1;
+	return 0;
+}
+early_initcall(__might_sleep_init);
+
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	    oops_in_progress)
+		return;
+	if (system_state != SYSTEM_RUNNING &&
+	    (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
@@ -7634,6 +7645,23 @@ static void cpu_cgroup_destroy(struct cgroup *cgrp)
 	sched_destroy_group(tg);
 }
 
+static int
+cpu_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	const struct cred *cred = current_cred(), *tcred;
+	struct task_struct *task;
+
+	cgroup_taskset_for_each(task, cgrp, tset) {
+		tcred = __task_cred(task);
+
+		if ((current != task) && !capable(CAP_SYS_NICE) &&
+		    cred->euid != tcred->uid && cred->euid != tcred->suid)
+			return -EACCES;
+	}
+
+	return 0;
+}
+
 static int cpu_cgroup_can_attach(struct cgroup *cgrp,
 				 struct cgroup_taskset *tset)
 {
@@ -7995,6 +8023,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.destroy	= cpu_cgroup_destroy,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
+	.allow_attach	= cpu_cgroup_allow_attach,
 	.exit		= cpu_cgroup_exit,
 	.populate	= cpu_cgroup_populate,
 	.subsys_id	= cpu_cgroup_subsys_id,
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 44af55e6d5d..8f32475d0ee 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1983,6 +1983,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 {
+	struct sched_rt_entity *rt_se = &p->rt;
+
 	update_curr_rt(rq);
 
 	watchdog(rq, p);
@@ -2000,12 +2002,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	p->rt.time_slice = RR_TIMESLICE;
 
 	/*
-	 * Requeue to the end of queue if we are not the only element
-	 * on the queue:
+	 * Requeue to the end of queue if we (and all of our ancestors) are the
+	 * only element on the queue
 	 */
-	if (p->rt.run_list.prev != p->rt.run_list.next) {
-		requeue_task_rt(rq, p, 0);
-		set_tsk_need_resched(p);
+	for_each_sched_rt_entity(rt_se) {
+		if (rt_se->run_list.prev != rt_se->run_list.next) {
+			requeue_task_rt(rq, p, 0);
+			set_tsk_need_resched(p);
+			return;
+		}
 	}
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4ab11879aeb..49f47258272 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -102,6 +102,7 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 extern int pid_max;
 extern int min_free_kbytes;
+extern int min_free_order_shift;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
@@ -1199,6 +1200,13 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 	{
+		.procname	= "min_free_order_shift",
+		.data		= &min_free_order_shift,
+		.maxlen		= sizeof(min_free_order_shift),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
 		.maxlen		= sizeof(percpu_pagelist_fraction),
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8a538c55fc7..aa27d391bfc 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(rtcdev_lock);
  * If one has not already been chosen, it checks to see if a
  * functional rtc device is available.
  */
-static struct rtc_device *alarmtimer_get_rtcdev(void)
+struct rtc_device *alarmtimer_get_rtcdev(void)
 {
 	unsigned long flags;
 	struct rtc_device *ret;
@@ -115,7 +115,7 @@ static void alarmtimer_rtc_interface_remove(void)
 	class_interface_unregister(&alarmtimer_rtc_interface);
 }
 #else
-static inline struct rtc_device *alarmtimer_get_rtcdev(void)
+struct rtc_device *alarmtimer_get_rtcdev(void)
 {
 	return NULL;
 }
author	Philippe Langlais <philippe.langlais@stericsson.com>	2012-06-04 19:46:24 +0800
committer	Philippe Langlais <philippe.langlais@stericsson.com>	2012-06-04 19:46:24 +0800
commit	29a3e1ef73daaa91cbf5cce2fb475f1878254402 (patch)
tree	fbd1df992706a4786633a7986a31b9cf78b3dced /kernel
parent	564119271649aa2d1840e79badf743e957b497e6 (diff)
parent	3c16bd7bb0a2b00b71f65bb0075e0c94d0fcc777 (diff)