HMP: Implement task packing for small tasks in HMP systems

If we wake up a task on a little CPU, fill CPUs rather than spread. Adds 2 new files to sys/kernel/hmp to control packing behaviour. packing_enable: task packing enabled (1) or disabled (0) packing_limit: Runqueues will be filled up to this load ratio. This functionality is disabled by default on TC2 as it lacks per-cpu power gating so packing small tasks there doesn't make sense. Signed-off-by: Chris Redpath <chris.redpath@arm.com> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com> Signed-off-by: Jon Medhurst <tixy@linaro.org> [k.kozlowski: rebased on 4.1, no signed-off-by of previous committer] Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
author: Chris Redpath <chris.redpath@arm.com> 2015-02-03 21:45:54 +0900
committer: Seung-Woo Kim <sw0312.kim@samsung.com> 2016-12-14 13:41:50 +0900
commit: a9f7ae0d308fbb55ed5c65733c1044545af15e05 (patch)
tree: 67cc3d2b9c1da5fcf7681ed723d6bac0ad065459 /kernel
parent: 8209016c92325719af3aab2b8e7de3c568ecdfb9 (diff)
1 files changed, 134 insertions, 9 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 10280438fcb1..fa47df3f2eda 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2515,11 +2515,7 @@ struct hmp_global_attr {
 	int (*from_sysfs)(int);
 };
 
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
 
 struct hmp_data_struct {
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -5163,6 +5159,13 @@ static struct sched_entity *hmp_get_lightest_task(
  * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
  * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
  * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
  */
 unsigned int hmp_up_threshold = 700;
 unsigned int hmp_down_threshold = 512;
@@ -5172,11 +5175,27 @@ unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
 unsigned int hmp_next_up_threshold = 4096;
 unsigned int hmp_next_down_threshold = 4096;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+   we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650;  /*  80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
 static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
 static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 						int *min_cpu, struct cpumask *affinity);
 
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+	return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
+
 /* Check if cpu is in fastest hmp_domain */
 static inline unsigned int hmp_cpu_is_fastest(int cpu)
 {
@@ -5256,6 +5275,49 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 	return lowest_cpu;
 }
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+		int cpu) {
+	int tmp_cpu;
+	unsigned long estimated_load;
+	struct hmp_domain *hmp;
+	struct sched_avg *avg;
+	struct cpumask allowed_hmp_cpus;
+
+	if(!hmp_packing_enabled ||
+			tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+		return hmp_select_slower_cpu(tsk, cpu);
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
+
+	/* respect affinity */
+	cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+			tsk_cpus_allowed(tsk));
+
+	for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+		avg = &cpu_rq(tmp_cpu)->avg;
+		/* estimate new rq load if we add this task */
+		estimated_load = avg->load_avg_ratio +
+				tsk->se.avg.load_avg_ratio;
+		if (estimated_load <= hmp_full_threshold) {
+			cpu = tmp_cpu;
+			break;
+		}
+	}
+	/* if no match was found, the task uses the initial value */
+	return cpu;
+}
+#endif
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
 	/* hack - always use clock from first online CPU */
@@ -5380,6 +5442,15 @@ static int hmp_freqinvar_from_sysfs(int value)
 	return value;
 }
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+	if (value < 0)
+		return -1;
+	return value;
+}
+#endif
 static void hmp_attr_add(
 	const char *name,
 	int *value,
@@ -5433,6 +5504,16 @@ static int hmp_attr_init(void)
 		NULL,
 		hmp_freqinvar_from_sysfs);
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	hmp_attr_add("packing_enable",
+		&hmp_packing_enabled,
+		NULL,
+		hmp_freqinvar_from_sysfs);
+	hmp_attr_add("packing_limit",
+		&hmp_full_threshold,
+		NULL,
+		hmp_packing_from_sysfs);
+#endif
 	hmp_data.attr_group.name = "hmp";
 	hmp_data.attr_group.attrs = hmp_data.attributes;
 	ret = sysfs_create_group(kernel_kobj,
@@ -5681,10 +5762,17 @@ unlock:
 		return new_cpu;
 	}
 	if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
+
 		new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-		hmp_next_down_delay(&p->se, new_cpu);
-		trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
-		return new_cpu;
+#endif
+		if (new_cpu != prev_cpu) {
+			hmp_next_down_delay(&p->se, new_cpu);
+			trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+			return new_cpu;
+		}
 	}
 	/* Make sure that the task stays in its previous hmp domain */
 	if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -8189,19 +8277,50 @@ static struct {
 	unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+	struct hmp_domain *hmp;
+	/* always allow ilb on non-slowest domain */
+	if (!hmp_cpu_is_slowest(cpu))
+		return 1;
+
+	hmp = hmp_cpu_domain(cpu);
+	for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+		/* only idle balance if a CPU is loaded over threshold */
+		if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+			return 1;
+	}
+	return 0;
+}
+#endif
+
 static inline int find_new_ilb(void)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
 
 #ifdef CONFIG_SCHED_HMP
 	int call_cpu = smp_processor_id();
+	int ilb_needed = 1;
+
 	/* restrict nohz balancing to occur in the same hmp domain */
 	ilb = cpumask_first_and(nohz.idle_cpus_mask,
 			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	if (ilb < nr_cpu_ids)
+		ilb_needed = hmp_packing_ilb_needed(ilb);
 #endif
 
+	if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+		return ilb;
+#else
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
+#endif
 
 	return nr_cpu_ids;
 }
@@ -8612,8 +8731,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
 	struct task_struct *p = task_of(se);
 	u64 now;
 
-	if (hmp_cpu_is_slowest(cpu))
+	if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		if(hmp_packing_enabled)
+			return 1;
+		else
+#endif
 		return 0;
+	}
 
 #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
 	/* Filter by task priority */
author	Chris Redpath <chris.redpath@arm.com>	2015-02-03 21:45:54 +0900
committer	Seung-Woo Kim <sw0312.kim@samsung.com>	2016-12-14 13:41:50 +0900
commit	a9f7ae0d308fbb55ed5c65733c1044545af15e05 (patch)
tree	67cc3d2b9c1da5fcf7681ed723d6bac0ad065459 /kernel
parent	8209016c92325719af3aab2b8e7de3c568ecdfb9 (diff)