HMP: Modify the runqueue stats to add a new child stat

The original intent here was to track unweighted runqueue load with less resolution so we could use the least-recently-disturbed runqueue to choose between 'closely related' load levels. However, after experimenting with the resolution it turns out that the following algorithm is highly beneficial for mobile workloads. In hmp_domain_min_load: * If any CPU is zero, the overall load is zero * If no CPUs are idle, the domain is 'fully loaded' Additionally, the time since last migration count is used to discriminate between idle CPUs. Signed-off-by: Chris Redpath <chris.redpath@arm.com> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com> Signed-off-by: Jon Medhurst <tixy@linaro.org> [k.kozlowski: rebased on 4.1, no signed-off-by of previous committer] Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
author: Chris Redpath <chris.redpath@arm.com> 2015-02-03 16:23:22 +0900
committer: Seung-Woo Kim <sw0312.kim@samsung.com> 2016-12-14 13:41:43 +0900
commit: c7ae8e3810adfb550179f0d269539f355afb3705 (patch)
tree: 2d97a069c285435c6d4292e09a47010ce61f426f /kernel
parent: 7a91f6f298353c007ce509f83ab742a80f56a180 (diff)
2 files changed, 17 insertions, 12 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2ef8640a66d7..7a00317cbd56 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1838,7 +1838,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->se.avg.load_avg_ratio = 1023;
 		p->se.avg.load_avg_contrib =
 				(1023 * scale_load_down(p->se.load.weight));
-		p->se.avg.runnable_avg_period = LOAD_AVG_MAX;
+		p->se.avg.avg_period = LOAD_AVG_MAX;
 		p->se.avg.runnable_avg_sum = LOAD_AVG_MAX;
 		p->se.avg.usage_avg_sum = LOAD_AVG_MAX;
 	}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9d8e20708bbf..041d45df7bfb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2869,7 +2869,6 @@ static inline void __update_group_entity_contrib(struct sched_entity *se)
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-	u32 contrib;
 	int cpu = -1;	/* not used in normal case */
 
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -2878,9 +2877,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 	__update_entity_runnable_avg(rq_clock_task(rq), cpu, &rq->avg, runnable,
 				     runnable);
 	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
-	contrib = rq->avg.runnable_avg_sum * scale_load_down(1024);
-	contrib /= (rq->avg.avg_period + 1);
-	trace_sched_rq_runnable_ratio(cpu_of(rq), scale_load(contrib));
+	trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
 	trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -2908,9 +2905,10 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
 }
 
 /* Compute the current contribution to load_avg by se, return any delta */
-static long __update_entity_load_avg_contrib(struct sched_entity *se)
+static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio)
 {
 	long old_contrib = se->avg.load_avg_contrib;
+	long old_ratio   = se->avg.load_avg_ratio;
 
 	if (entity_is_task(se)) {
 		__update_task_entity_contrib(se);
@@ -2919,6 +2917,8 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 		__update_group_entity_contrib(se);
 	}
 
+	if (ratio)
+		*ratio = se->avg.load_avg_ratio - old_ratio;
 	return se->avg.load_avg_contrib - old_contrib;
 }
 
@@ -2962,7 +2962,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 					  int update_cfs_rq)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	long contrib_delta, utilization_delta;
+	long contrib_delta, utilization_delta, ratio_delta;
 	u64 now;
 	int cpu = -1;   /* not used in normal case */
 
@@ -2982,7 +2982,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 			cfs_rq->curr == se))
 		return;
 
-	contrib_delta = __update_entity_load_avg_contrib(se);
+	contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta);
 	utilization_delta = __update_entity_utilization_avg_contrib(se);
 
 	if (!update_cfs_rq)
@@ -2991,6 +2991,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 	if (se->on_rq) {
 		cfs_rq->runnable_load_avg += contrib_delta;
 		cfs_rq->utilization_load_avg += utilization_delta;
+		rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta;
 	} else {
 		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
 	}
@@ -3069,6 +3070,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
 
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
 	cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib;
+
 	/* we force update consideration on load-balancer moves */
 	update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -3088,6 +3090,7 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
 
 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
 	cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
+
 	if (sleep) {
 		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -5459,15 +5462,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 	unsigned long contrib, scaled_contrib;
 	struct sched_avg *avg;
 
-	for_each_cpu_mask(cpu, hmpd->cpus) {
+	for_each_cpu(cpu, &hmpd->cpus) {
 		avg = &cpu_rq(cpu)->avg;
 		/* used for both up and down migration */
 		curr_last_migration = avg->hmp_last_up_migration ?
 			avg->hmp_last_up_migration : avg->hmp_last_down_migration;
 
 		/* don't use the divisor in the loop, just at the end */
-		contrib = avg->runnable_avg_sum * scale_load_down(1024);
-		scaled_contrib = contrib >> 22;
+		contrib = avg->load_avg_ratio * scale_load_down(1024);
+		scaled_contrib = contrib >> 13;
 
 		if ((contrib < min_runnable_load) ||
 			(scaled_contrib == scaled_min_runnable_load &&
@@ -5490,7 +5493,9 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 		*min_cpu = min_cpu_runnable_temp;
 
 	/* domain will often have at least one empty CPU */
-	return min_runnable_load ? min_runnable_load / (LOAD_AVG_MAX + 1) : 0;
+	trace_printk("hmp_domain_min_load returning %lu\n",
+		min_runnable_load > 1023 ? 1023 : min_runnable_load);
+	return min_runnable_load > 1023 ? 1023 : min_runnable_load;
 }
 
 /*
author	Chris Redpath <chris.redpath@arm.com>	2015-02-03 16:23:22 +0900
committer	Seung-Woo Kim <sw0312.kim@samsung.com>	2016-12-14 13:41:43 +0900
commit	c7ae8e3810adfb550179f0d269539f355afb3705 (patch)
tree	2d97a069c285435c6d4292e09a47010ce61f426f /kernel
parent	7a91f6f298353c007ce509f83ab742a80f56a180 (diff)