16 files changed, 744 insertions, 84 deletions
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index aec18abf4e6..9d5255ae784 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -305,6 +305,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  * We provide our own arch_get_unmapped_area to cope with VIPT caches.
  */
 #define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
 /*
  * remap a physical page `pfn' of size `size' with page protection `prot'
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index b2d9df5667a..ce280b8d613 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -123,6 +123,8 @@ static inline void prefetch(const void *ptr)
 
 #endif
 
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
 #endif
 
 #endif /* __ASM_ARM_PROCESSOR_H */
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index a7e457ed27c..f7f02e392ef 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -25,7 +25,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
-const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #else
 
@@ -34,6 +34,39 @@ static inline void store_cpu_topology(unsigned int cpuid) { }
 
 #endif
 
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_PREFER_LOCAL			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				| arch_sd_sibling_asym_packing()	\
+				| sd_balance_for_package_power()	\
+				| sd_power_saving_flags()		\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 1040c00405d..053ce9cbc0b 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -18,6 +18,17 @@
 #include <linux/node.h>
 #include <linux/nodemask.h>
 #include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+
+#ifdef CONFIG_CPU_FREQ
+#include <linux/cpufreq.h>
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>	/* for copy_from_user */
+#endif
 
 #include <asm/cputype.h>
 #include <asm/topology.h>
@@ -43,12 +54,327 @@
 
 struct cputopo_arm cpu_topology[NR_CPUS];
 
-const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+/*
+ * cpu power scale management
+ */
+
+/*
+ * a per cpu data structure should be better because each cpu is mainly
+ * using its own cpu_power even it's not always true because of
+ * no_hz_idle_balance
+ */
+
+static DEFINE_PER_CPU(unsigned int, cpu_scale);
+
+/*
+ * cpu topology mask management
+ */
+
+unsigned int advanced_topology = 1;
+
+static void normal_cpu_topology_mask(void);
+static void (*set_cpu_topology_mask)(void) = normal_cpu_topology_mask;
+
+#ifdef CONFIG_CPU_FREQ
+/*
+ * This struct describes parameters to compute cpu_power
+ */
+struct cputopo_power {
+	int id;
+	int max; /* max idx in the table */
+	unsigned int step; /* frequency step for the table */
+	unsigned int *table; /* table of cpu_power */
+};
+
+/* default table with one default cpu_power value */
+unsigned int table_default_power[1] = {
+	1024
+};
+
+static struct cputopo_power default_cpu_power = {
+	.max  = 1,
+	.step = 1,
+	.table = table_default_power,
+};
+
+/* CA-9 table with cpufreq modifying cpu_power */
+#define CPU_MAX_FREQ 10
+/* we use a 200Mhz step for scaling cpu power */
+#define CPU_TOPO_FREQ_STEP 200000
+/* This table sets the cpu_power scale of a cpu according to 2 inputs which are
+ * the frequency and the sched_mc mode. The content of this table could be SoC
+ * specific so we should add a method to overwrite this default table.
+ * TODO: Study how to use DT for setting this table
+ */
+unsigned int table_ca9_power[CPU_MAX_FREQ] = {
+/* freq< 200   400   600   800  1000  1200  1400  1600  1800  other*/
+	4096, 4096, 4096, 1024, 1024, 1024, 1024, 1024, 1024, 1024, /* Power save mode CA9 MP */
+};
+
+static struct cputopo_power CA9_cpu_power = {
+	.max  = CPU_MAX_FREQ,
+	.step = CPU_TOPO_FREQ_STEP,
+	.table = table_ca9_power,
+};
+
+#define ARM_CORTEX_A9_DEFAULT_SCALE 0
+#define ARM_CORTEX_A9_POWER_SCALE 1
+/* This table list all possible cpu power configuration */
+struct cputopo_power *table_config[2] = {
+	&default_cpu_power,
+	&CA9_cpu_power,
+};
+
+struct cputopo_scale {
+	int id;
+	int freq;
+	struct cputopo_power *power;
+};
+
+/*
+ * The table will be mostly used by one cpu which will update the
+ * configuration for all cpu on a cpufreq notification
+ * or a sched_mc level change
+ */
+static struct cputopo_scale cpu_power[NR_CPUS];
+
+static void set_cpufreq_scale(unsigned int cpuid, unsigned int freq)
+{
+	unsigned int idx;
+
+	cpu_power[cpuid].freq = freq;
+
+	idx = freq / cpu_power[cpuid].power->step;
+	if (idx >= cpu_power[cpuid].power->max)
+		idx = cpu_power[cpuid].power->max - 1;
+
+	per_cpu(cpu_scale, cpuid) = cpu_power[cpuid].power->table[idx];
+	smp_wmb();
+}
+
+static void set_power_scale(unsigned int cpu, unsigned int idx)
+{
+	cpu_power[cpu].id = idx;
+	cpu_power[cpu].power = table_config[idx];
+
+	set_cpufreq_scale(cpu, cpu_power[cpu].freq);
+}
+
+static int topo_cpufreq_transition(struct notifier_block *nb,
+	unsigned long state, void *data)
+{
+	struct cpufreq_freqs *freqs = data;
+
+	if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE)
+		set_cpufreq_scale(freqs->cpu, freqs->new);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block topo_cpufreq_nb = {
+	.notifier_call = topo_cpufreq_transition,
+};
+
+static int topo_cpufreq_init(void)
+{
+	unsigned int cpu;
+
+	/* TODO set initial value according to current freq */
+
+	/* init core mask */
+	for_each_possible_cpu(cpu) {
+		cpu_power[cpu].freq = 0;
+		cpu_power[cpu].power = &default_cpu_power;
+	}
+
+	return cpufreq_register_notifier(&topo_cpufreq_nb,
+			CPUFREQ_TRANSITION_NOTIFIER);
+}
+#else
+#define ARM_CORTEX_A9_DEFAULT_SCALE 0
+#define ARM_CORTEX_A9_POWER_SCALE 0
+/* This table list all possible cpu power configuration */
+unsigned int table_config[1] = {
+	1024,
+};
+
+static void set_power_scale(unsigned int cpu, unsigned int idx)
+{
+	per_cpu(cpu_scale, cpu) = table_config[idx];
+}
+
+static inline int topo_cpufreq_init(void) {return 0; }
+#endif
+
+static int init_cpu_power_scale(void)
+{
+	/* register cpufreq notifer */
+	topo_cpufreq_init();
+
+	/* Do we need to change default config */
+	advanced_topology = 1;
+
+	/* Force a cpu topology update */
+	rebuild_sched_domains();
+
+	return 0;
+}
+
+core_initcall(init_cpu_power_scale);
+
+/*
+ * Update the cpu power
+ */
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+/*
+ * sched_domain flag configuration
+ */
+/* TODO add a config flag for this function */
+int arch_sd_sibling_asym_packing(void)
+{
+	if (sched_smt_power_savings || sched_mc_power_savings)
+		return SD_ASYM_PACKING;
+	return 0;
+}
+
+/*
+ * default topology function
+ */
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	return &cpu_topology[cpu].core_sibling;
 }
 
 /*
+ * clear cpu topology masks
+ */
+static void clear_cpu_topology_mask(void)
+{
+	unsigned int cpuid;
+	for_each_possible_cpu(cpuid) {
+		struct cputopo_arm *cpuid_topo = &(cpu_topology[cpuid]);
+		cpumask_clear(&cpuid_topo->core_sibling);
+		cpumask_clear(&cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
+/*
+ * default_cpu_topology_mask set the core and thread mask as described in the
+ * ARM ARM
+ */
+static inline void default_cpu_topology_mask(unsigned int cpuid)
+{
+	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->socket_id == cpu_topo->socket_id) {
+			cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+			if (cpu != cpuid)
+				cpumask_set_cpu(cpu,
+					&cpuid_topo->core_sibling);
+
+			if (cpuid_topo->core_id == cpu_topo->core_id) {
+				cpumask_set_cpu(cpuid,
+					&cpu_topo->thread_sibling);
+				if (cpu != cpuid)
+					cpumask_set_cpu(cpu,
+						&cpuid_topo->thread_sibling);
+			}
+		}
+	}
+	smp_wmb();
+}
+
+static void normal_cpu_topology_mask(void)
+{
+	unsigned int cpuid;
+
+	for_each_possible_cpu(cpuid) {
+		default_cpu_topology_mask(cpuid);
+		set_power_scale(cpuid, ARM_CORTEX_A9_DEFAULT_SCALE);
+	}
+	smp_wmb();
+}
+
+/*
+ * For Cortex-A9 MPcore, we emulate a multi-package topology in power mode.
+ * The goal is to gathers tasks on 1 virtual package
+ */
+static void power_cpu_topology_mask_CA9(void)
+{
+	unsigned int cpuid, cpu;
+
+	for_each_possible_cpu(cpuid) {
+		struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+
+		for_each_possible_cpu(cpu) {
+			struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
+
+			if ((cpuid_topo->socket_id == cpu_topo->socket_id)
+			&& ((cpuid & 0x1) == (cpu & 0x1))) {
+				cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+				if (cpu != cpuid)
+					cpumask_set_cpu(cpu,
+						&cpuid_topo->core_sibling);
+
+				if (cpuid_topo->core_id == cpu_topo->core_id) {
+					cpumask_set_cpu(cpuid,
+						&cpu_topo->thread_sibling);
+					if (cpu != cpuid)
+						cpumask_set_cpu(cpu,
+							&cpuid_topo->thread_sibling);
+				}
+			}
+		}
+		set_power_scale(cpuid, ARM_CORTEX_A9_POWER_SCALE);
+	}
+	smp_wmb();
+}
+
+#define ARM_FAMILY_MASK 0xFF0FFFF0
+#define ARM_CORTEX_A9_FAMILY 0x410FC090
+
+/* update_cpu_topology_policy select a cpu topology policy according to the
+ * available cores.
+ * TODO: The current version assumes that all cores are exactly the same which
+ * might not be true. We need to update it to take into account various
+ * configuration among which system with different kind of core.
+ */
+static int update_cpu_topology_policy(void)
+{
+	unsigned long cpuid;
+
+	if (sched_mc_power_savings == POWERSAVINGS_BALANCE_NONE) {
+		set_cpu_topology_mask = normal_cpu_topology_mask;
+		return 0;
+	}
+
+	cpuid = read_cpuid_id();
+	cpuid &= ARM_FAMILY_MASK;
+
+	switch (cpuid) {
+	case ARM_CORTEX_A9_FAMILY:
+		set_cpu_topology_mask = power_cpu_topology_mask_CA9;
+	break;
+	default:
+		set_cpu_topology_mask = normal_cpu_topology_mask;
+	break;
+	}
+
+	return 0;
+}
+
+/*
  * store_cpu_topology is called at boot when only one cpu is running
  * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
  * which prevents simultaneous write access to cpu_topology array
@@ -57,7 +383,6 @@ void store_cpu_topology(unsigned int cpuid)
 {
 	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
 	unsigned int mpidr;
-	unsigned int cpu;
 
 	/* If the cpu topology has been already set, just return */
 	if (cpuid_topo->core_id != -1)
@@ -99,26 +424,11 @@ void store_cpu_topology(unsigned int cpuid)
 		cpuid_topo->socket_id = -1;
 	}
 
-	/* update core and thread sibling masks */
-	for_each_possible_cpu(cpu) {
-		struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
-
-		if (cpuid_topo->socket_id == cpu_topo->socket_id) {
-			cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-			if (cpu != cpuid)
-				cpumask_set_cpu(cpu,
-					&cpuid_topo->core_sibling);
-
-			if (cpuid_topo->core_id == cpu_topo->core_id) {
-				cpumask_set_cpu(cpuid,
-					&cpu_topo->thread_sibling);
-				if (cpu != cpuid)
-					cpumask_set_cpu(cpu,
-						&cpuid_topo->thread_sibling);
-			}
-		}
-	}
-	smp_wmb();
+	/*
+	 * The core and thread sibling masks can also be updated during the
+	 * call of arch_update_cpu_topology
+	 */
+	default_cpu_topology_mask(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -127,6 +437,27 @@ void store_cpu_topology(unsigned int cpuid)
 }
 
 /*
+ * arch_update_cpu_topology is called by the scheduler before building
+ * a new sched_domain hierarchy.
+ */
+int arch_update_cpu_topology(void)
+{
+	if (!advanced_topology)
+		return 0;
+
+	/* clear core threads mask */
+	clear_cpu_topology_mask();
+
+	/* set topology policy */
+	update_cpu_topology_policy();
+
+	/* set topology mask and power */
+	(*set_cpu_topology_mask)();
+
+	return 1;
+}
+
+/*
  * init_cpu_topology is called at boot when only one cpu is running
  * which prevent simultaneous write access to cpu_topology array
  */
@@ -143,6 +474,116 @@ void init_cpu_topology(void)
 		cpu_topo->socket_id = -1;
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
+
+		per_cpu(cpu_scale, cpu) = SCHED_POWER_SCALE;
 	}
 	smp_wmb();
 }
+
+/*
+ * debugfs interface for scaling cpu power
+ */
+
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *topo_debugfs_root;
+
+static ssize_t dbg_write(struct file *file, const char __user *buf,
+						size_t size, loff_t *off)
+{
+	unsigned int *value = file->f_dentry->d_inode->i_private;
+	char cdata[128];
+	unsigned long tmp;
+	unsigned int cpu;
+
+	if (size < (sizeof(cdata)-1)) {
+		if (copy_from_user(cdata, buf, size))
+			return -EFAULT;
+		cdata[size] = 0;
+		if (!strict_strtoul(cdata, 10, &tmp)) {
+			*value = tmp;
+
+#ifdef CONFIG_CPU_FREQ
+			for_each_online_cpu(cpu)
+				set_power_scale(cpu, cpu_power[cpu].id);
+#endif
+		}
+		return size;
+	}
+	return -EINVAL;
+}
+
+static ssize_t dbg_read(struct file *file, char __user *buf,
+						size_t size, loff_t *off)
+{
+	unsigned int *value = file->f_dentry->d_inode->i_private;
+	char cdata[128];
+	unsigned int len;
+
+	len = sprintf(cdata, "%u\n", *value);
+	return simple_read_from_buffer(buf, size, off, cdata, len);
+}
+
+static const struct file_operations debugfs_fops = {
+	.read = dbg_read,
+	.write = dbg_write,
+};
+
+static struct dentry *topo_debugfs_register(unsigned int cpu,
+						struct dentry *parent)
+{
+	struct dentry *cpu_d, *d;
+	char cpu_name[16];
+
+	sprintf(cpu_name, "cpu%u", cpu);
+
+	cpu_d = debugfs_create_dir(cpu_name, parent);
+	if (!cpu_d)
+		return NULL;
+
+	d = debugfs_create_file("cpu_power", S_IRUGO  | S_IWUGO,
+				cpu_d, &per_cpu(cpu_scale, cpu), &debugfs_fops);
+	if (!d)
+		goto err_out;
+
+#ifdef CONFIG_CPU_FREQ
+	d = debugfs_create_file("scale", S_IRUGO | S_IWUGO,
+				cpu_d, &cpu_power[cpu].id, &debugfs_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file("freq", S_IRUGO,
+				cpu_d, &cpu_power[cpu].freq, &debugfs_fops);
+	if (!d)
+		goto err_out;
+#endif
+	return cpu_d;
+
+err_out:
+	debugfs_remove_recursive(cpu_d);
+	return NULL;
+}
+
+static int __init topo_debugfs_init(void)
+{
+	struct dentry *d;
+	unsigned int cpu;
+
+	d = debugfs_create_dir("cpu_topo", NULL);
+	if (!d)
+		return -ENOMEM;
+	topo_debugfs_root = d;
+
+	for_each_possible_cpu(cpu) {
+		d = topo_debugfs_register(cpu, topo_debugfs_root);
+		if (d == NULL)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	debugfs_remove_recursive(topo_debugfs_root);
+	return -ENOMEM;
+}
+
+late_initcall(topo_debugfs_init);
+#endif
diff --git a/arch/arm/mach-exynos4/platsmp.c b/arch/arm/mach-exynos4/platsmp.c
index ca01370840d..c345b783cde 100644
--- a/arch/arm/mach-exynos4/platsmp.c
+++ b/arch/arm/mach-exynos4/platsmp.c
@@ -195,12 +195,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "EXYNOS4: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
index 1a1af9e5625..72765952091 100644
--- a/arch/arm/mach-msm/platsmp.c
+++ b/arch/arm/mach-msm/platsmp.c
@@ -156,6 +156,12 @@ void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = get_core_count();
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index ce65e9329c7..889464dc7b2 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -109,12 +109,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_get_core_count(scu_base);
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "OMAP4: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 9ac0225cd51..e40dc47cf2f 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -775,7 +775,6 @@ static struct gpio cm_x300_wi2wi_gpios[] __initdata = {
 
 static void __init cm_x300_init_wi2wi(void)
 {
-	int bt_reset, wlan_en;
 	int err;
 
 	if (system_rev < 130) {
@@ -791,12 +790,11 @@ static void __init cm_x300_init_wi2wi(void)
 	}
 
 	udelay(10);
-	gpio_set_value(bt_reset, 0);
+	gpio_set_value(cm_x300_wi2wi_gpios[1].gpio, 0);
 	udelay(10);
-	gpio_set_value(bt_reset, 1);
+	gpio_set_value(cm_x300_wi2wi_gpios[1].gpio, 1);
 
-	gpio_free(wlan_en);
-	gpio_free(bt_reset);
+	gpio_free_array(ARRAY_AND_SIZE(cm_x300_wi2wi_gpios));
 }
 
 /* MFP */
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 4ae943bafa9..e83c654a58d 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -52,12 +52,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "Realview: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index 66f980625a3..e4e485fa253 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -56,6 +56,12 @@ void __init smp_init_cpus(void)
 	unsigned int ncores = shmobile_smp_get_core_count();
 	unsigned int i;
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 0886cbccdde..7d2b5d03c1d 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -114,10 +114,10 @@ void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = scu_get_core_count(scu_base);
 
-	if (ncores > NR_CPUS) {
-		printk(KERN_ERR "Tegra: no. of cores (%u) greater than configured (%u), clipping\n",
-			ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 1da23bb87c1..8aa104a4711 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -99,7 +99,27 @@ static void ux500_l2x0_inv_all(void)
 	ux500_cache_sync();
 }
 
-static int ux500_l2x0_init(void)
+static int __init ux500_l2x0_unlock(void)
+{
+	int i;
+
+	/*
+	 * Unlock Data and Instruction Lock if locked. Ux500 U-Boot versions
+	 * apparently locks both caches before jumping to the kernel. The
+	 * l2x0 core will not touch the unlock registers if the l2x0 is
+	 * already enabled, so we do it right here instead. The PL310 has
+	 * 8 sets of registers, one per possible CPU.
+	 */
+	for (i = 0; i < 8; i++) {
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+	}
+	return 0;
+}
+
+static int __init ux500_l2x0_init(void)
 {
 	if (cpu_is_u5500())
 		l2x0_base = __io_address(U5500_L2CC_BASE);
@@ -108,6 +128,9 @@ static int ux500_l2x0_init(void)
 	else
 		ux500_unknown_soc();
 
+	/* Unlock before init */
+	ux500_l2x0_unlock();
+
 	/* 64KB way size, 8 way associativity, force WA */
 	l2x0_init(l2x0_base, 0x3e060000, 0xc0000fff);
 
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index a33df5f4c27..eb5199102cf 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -156,12 +156,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "U8500: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index bfd32f52c2d..2b1e836a76e 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -221,6 +221,12 @@ static void ct_ca9x4_init_cpu_map(void)
 {
 	int i, ncores = scu_get_core_count(MMIO_P2V(A9_MPCORE_SCU));
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; ++i)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 74be05f3e03..c2809d7990b 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -9,13 +9,51 @@
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
-#include <asm/cputype.h>
-#include <asm/system.h>
+#include <asm/cachetype.h>
+
+static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+					      unsigned long pgoff)
+{
+	unsigned long base = addr & ~(SHMLBA-1);
+	unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1);
+
+	if (base + off <= addr)
+		return base + off;
+
+	return base - off;
+}
 
 #define COLOUR_ALIGN(addr,pgoff)		\
 	((((addr)+SHMLBA-1)&~(SHMLBA-1)) +	\
 	 (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
 
+/* gap between mmap and stack */
+#define MIN_GAP (128*1024*1024UL)
+#define MAX_GAP ((TASK_SIZE)/6*5)
+
+static int mmap_is_legacy(void)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+static unsigned long mmap_base(unsigned long rnd)
+{
+	unsigned long gap = rlimit(RLIMIT_STACK);
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+}
+
 /*
  * We need to ensure that shared mappings are correctly aligned to
  * avoid aliasing issues with VIPT caches.  We need to ensure that
@@ -32,25 +70,15 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
-	unsigned int cache_type;
-	int do_align = 0, aliasing = 0;
+	int do_align = 0;
+	int aliasing = cache_is_vipt_aliasing() || icache_is_vipt_aliasing();
 
 	/*
 	 * We only need to do colour alignment if either the I or D
-	 * caches alias.  This is indicated by bits 9 and 21 of the
-	 * cache type register.
+	 * caches alias.
 	 */
-	cache_type = read_cpuid_cachetype();
-	if (cache_type != read_cpuid_id()) {
-		aliasing = (cache_type | cache_type >> 12) & (1 << 11);
-		if (aliasing)
-			do_align = filp || flags & MAP_SHARED;
-	}
-#else
-#define do_align 0
-#define aliasing 0
-#endif
+	if (aliasing)
+		do_align = filp || (flags & MAP_SHARED);
 
 	/*
 	 * We enforce the MAP_FIXED case.
@@ -79,13 +107,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	if (len > mm->cached_hole_size) {
 	        start_addr = addr = mm->free_area_cache;
 	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
+	        start_addr = addr = mm->mmap_base;
 	        mm->cached_hole_size = 0;
 	}
-	/* 8 bits of randomness in 20 address space bits */
-	if ((current->flags & PF_RANDOMIZE) &&
-	    !(current->personality & ADDR_NO_RANDOMIZE))
-		addr += (get_random_int() % (1 << 8)) << PAGE_SHIFT;
 
 full_search:
 	if (do_align)
@@ -122,6 +146,134 @@ full_search:
 	}
 }
 
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			const unsigned long len, const unsigned long pgoff,
+			const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_align = 0;
+	int aliasing = cache_is_vipt_aliasing() || icache_is_vipt_aliasing();
+
+	/*
+	 * We only need to do colour alignment if either the I or D
+	 * caches alias.
+	 */
+	if (aliasing)
+		do_align = filp || (flags & MAP_SHARED);
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (aliasing && flags & MAP_SHARED &&
+		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
+			return -EINVAL;
+		return addr;
+	}
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+		mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+	if (do_align) {
+		unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff);
+		addr = base + len;
+	}
+
+	/* make sure it can fit in the remaining address space */
+	if (addr > len) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+	}
+
+	if (mm->mmap_base < len)
+		goto bottomup;
+
+	addr = mm->mmap_base - len;
+	if (do_align)
+		addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (!vma || addr+len <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+			mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start - len;
+		if (do_align)
+			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+	} while (len < vma->vm_start);
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	/* 8 bits of randomness in 20 address space bits */
+	if ((current->flags & PF_RANDOMIZE) &&
+	    !(current->personality & ADDR_NO_RANDOMIZE))
+		random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT;
+
+	if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+	}
+}
 
 /*
  * You really shouldn't be using read() or write() on /dev/mem.  This
diff --git a/arch/arm/plat-mxc/include/mach/iomux-v3.h b/arch/arm/plat-mxc/include/mach/iomux-v3.h
index ebbce33097a..45099566fec 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-v3.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-v3.h
@@ -89,11 +89,11 @@ typedef u64 iomux_v3_cfg_t;
 #define PAD_CTL_HYS			(1 << 8)
 
 #define PAD_CTL_PKE			(1 << 7)
-#define PAD_CTL_PUE			(1 << 6)
-#define PAD_CTL_PUS_100K_DOWN		(0 << 4)
-#define PAD_CTL_PUS_47K_UP		(1 << 4)
-#define PAD_CTL_PUS_100K_UP		(2 << 4)
-#define PAD_CTL_PUS_22K_UP		(3 << 4)
+#define PAD_CTL_PUE			(1 << 6 | PAD_CTL_PKE)
+#define PAD_CTL_PUS_100K_DOWN		(0 << 4 | PAD_CTL_PUE)
+#define PAD_CTL_PUS_47K_UP		(1 << 4 | PAD_CTL_PUE)
+#define PAD_CTL_PUS_100K_UP		(2 << 4 | PAD_CTL_PUE)
+#define PAD_CTL_PUS_22K_UP		(3 << 4 | PAD_CTL_PUE)
 
 #define PAD_CTL_ODE			(1 << 3)