From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: introduce and use netdev_features_t for device features sets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:	add couple missing conversions in drivers
	split unexporting netdev_fix_features()
	implemented %pNF
	convert sock::sk_route_(no?)caps

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/vsprintf.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 993599e66e5..8e75003d62f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -777,6 +777,18 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
+static
+char *netdev_feature_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec)
+{
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	if (spec.field_width == -1)
+		spec.field_width = 2 + 2 * sizeof(netdev_features_t);
+	spec.base = 16;
+
+	return number(buf, end, *(const netdev_features_t *)addr, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -824,6 +836,7 @@ int kptr_restrict __read_mostly;
  *       Do not use this feature without some mechanism to verify the
  *       correctness of the format string and va_list arguments.
  * - 'K' For a kernel pointer that should be hidden from unprivileged users
+ * - 'NF' For a netdev_features_t
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -896,6 +909,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		       has_capability_noaudit(current, CAP_SYSLOG))))
 			ptr = NULL;
 		break;
+	case 'N':
+		switch (fmt[1]) {
+		case 'F':
+			return netdev_feature_string(buf, end, ptr, spec);
+		}
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
-- 
cgit v1.2.3


From 75957ba36c05b979701e9ec64b37819adc12f830 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:35 +0000
Subject: dql: Dynamic queue limits

Implementation of dynamic queue limits (dql).  This is a libary which
allows a queue limit to be dynamically managed.  The goal of dql is
to set the queue limit, number of objects to the queue, to be minimized
without allowing the queue to be starved.

dql would be used with a queue which has these properties:

1) Objects are queued up to some limit which can be expressed as a
   count of objects.
2) Periodically a completion process executes which retires consumed
   objects.
3) Starvation occurs when limit has been reached, all queued data has
   actually been consumed but completion processing has not yet run,
   so queuing new data is blocked.
4) Minimizing the amount of queued data is desirable.

A canonical example of such a queue would be a NIC HW transmit queue.

The queue limit is dynamic, it will increase or decrease over time
depending on the workload.  The queue limit is recalculated each time
completion processing is done.  Increases occur when the queue is
starved and can exponentially increase over successive intervals.
Decreases occur when more data is being maintained in the queue than
needed to prevent starvation.  The number of extra objects, or "slack",
is measured over successive intervals, and to avoid hysteresis the
limit is only reduced by the miminum slack seen over a configurable
time period.

dql API provides routines to manage the queue:
- dql_init is called to intialize the dql structure
- dql_reset is called to reset dynamic values
- dql_queued called when objects are being enqueued
- dql_avail returns availability in the queue
- dql_completed is called when objects have be consumed in the queue

Configuration consists of:
- max_limit, maximum limit
- min_limit, minimum limit
- slack_hold_time, time to measure instances of slack before reducing
  queue limit

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h |  97 +++++++++++++++++++++++++
 lib/Kconfig                          |   3 +
 lib/Makefile                         |   2 +
 lib/dynamic_queue_limits.c           | 133 +++++++++++++++++++++++++++++++++++
 4 files changed, 235 insertions(+)
 create mode 100644 include/linux/dynamic_queue_limits.h
 create mode 100644 lib/dynamic_queue_limits.c

(limited to 'lib')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
new file mode 100644
index 00000000000..5621547d631
--- /dev/null
+++ b/include/linux/dynamic_queue_limits.h
@@ -0,0 +1,97 @@
+/*
+ * Dynamic queue limits (dql) - Definitions
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ *
+ * This header file contains the definitions for dynamic queue limits (dql).
+ * dql would be used in conjunction with a producer/consumer type queue
+ * (possibly a HW queue).  Such a queue would have these general properties:
+ *
+ *   1) Objects are queued up to some limit specified as number of objects.
+ *   2) Periodically a completion process executes which retires consumed
+ *      objects.
+ *   3) Starvation occurs when limit has been reached, all queued data has
+ *      actually been consumed, but completion processing has not yet run
+ *      so queuing new data is blocked.
+ *   4) Minimizing the amount of queued data is desirable.
+ *
+ * The goal of dql is to calculate the limit as the minimum number of objects
+ * needed to prevent starvation.
+ *
+ * The primary functions of dql are:
+ *    dql_queued - called when objects are enqueued to record number of objects
+ *    dql_avail - returns how many objects are available to be queued based
+ *      on the object limit and how many objects are already enqueued
+ *    dql_completed - called at completion time to indicate how many objects
+ *      were retired from the queue
+ *
+ * The dql implementation does not implement any locking for the dql data
+ * structures, the higher layer should provide this.  dql_queued should
+ * be serialized to prevent concurrent execution of the function; this
+ * is also true for  dql_completed.  However, dql_queued and dlq_completed  can
+ * be executed concurrently (i.e. they can be protected by different locks).
+ */
+
+#ifndef _LINUX_DQL_H
+#define _LINUX_DQL_H
+
+#ifdef __KERNEL__
+
+struct dql {
+	/* Fields accessed in enqueue path (dql_queued) */
+	unsigned int	num_queued;		/* Total ever queued */
+	unsigned int	adj_limit;		/* limit + num_completed */
+	unsigned int	last_obj_cnt;		/* Count at last queuing */
+
+	/* Fields accessed only by completion path (dql_completed) */
+
+	unsigned int	limit ____cacheline_aligned_in_smp; /* Current limit */
+	unsigned int	num_completed;		/* Total ever completed */
+
+	unsigned int	prev_ovlimit;		/* Previous over limit */
+	unsigned int	prev_num_queued;	/* Previous queue total */
+	unsigned int	prev_last_obj_cnt;	/* Previous queuing cnt */
+
+	unsigned int	lowest_slack;		/* Lowest slack found */
+	unsigned long	slack_start_time;	/* Time slacks seen */
+
+	/* Configuration */
+	unsigned int	max_limit;		/* Max limit */
+	unsigned int	min_limit;		/* Minimum limit */
+	unsigned int	slack_hold_time;	/* Time to measure slack */
+};
+
+/* Set some static maximums */
+#define DQL_MAX_OBJECT (UINT_MAX / 16)
+#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
+
+/*
+ * Record number of objects queued. Assumes that caller has already checked
+ * availability in the queue with dql_avail.
+ */
+static inline void dql_queued(struct dql *dql, unsigned int count)
+{
+	BUG_ON(count > DQL_MAX_OBJECT);
+
+	dql->num_queued += count;
+	dql->last_obj_cnt = count;
+}
+
+/* Returns how many objects can be queued, < 0 indicates over limit. */
+static inline int dql_avail(const struct dql *dql)
+{
+	return dql->adj_limit - dql->num_queued;
+}
+
+/* Record number of completed objects and recalculate the limit. */
+void dql_completed(struct dql *dql, unsigned int count);
+
+/* Reset dql state */
+void dql_reset(struct dql *dql);
+
+/* Initialize dql state */
+int dql_init(struct dql *dql, unsigned hold_time);
+
+#endif /* _KERNEL_ */
+
+#endif /* _LINUX_DQL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be..63b5782732e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -244,6 +244,9 @@ config CPU_RMAP
 	bool
 	depends on SMP
 
+config DQL
+	bool
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index a4da283f5dc..ff00d4dcb7e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
+obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 00000000000..3d1bdcdd7db
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,133 @@
+/*
+ * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned int count)
+{
+	unsigned int inprogress, prev_inprogress, limit;
+	unsigned int ovlimit, all_prev_completed, completed;
+
+	/* Can't complete more than what's in queue */
+	BUG_ON(count > dql->num_queued - dql->num_completed);
+
+	completed = dql->num_completed + count;
+	limit = dql->limit;
+	ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+	inprogress = dql->num_queued - completed;
+	prev_inprogress = dql->prev_num_queued - dql->num_completed;
+	all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+	if ((ovlimit && !inprogress) ||
+	    (dql->prev_ovlimit && all_prev_completed)) {
+		/*
+		 * Queue considered starved if:
+		 *   - The queue was over-limit in the last interval,
+		 *     and there is no more data in the queue.
+		 *  OR
+		 *   - The queue was over-limit in the previous interval and
+		 *     when enqueuing it was possible that all queued data
+		 *     had been consumed.  This covers the case when queue
+		 *     may have becomes starved between completion processing
+		 *     running and next time enqueue was scheduled.
+		 *
+		 *     When queue is starved increase the limit by the amount
+		 *     of bytes both sent and completed in the last interval,
+		 *     plus any previous over-limit.
+		 */
+		limit += POSDIFF(completed, dql->prev_num_queued) +
+		     dql->prev_ovlimit;
+		dql->slack_start_time = jiffies;
+		dql->lowest_slack = UINT_MAX;
+	} else if (inprogress && prev_inprogress && !all_prev_completed) {
+		/*
+		 * Queue was not starved, check if the limit can be decreased.
+		 * A decrease is only considered if the queue has been busy in
+		 * the whole interval (the check above).
+		 *
+		 * If there is slack, the amount of execess data queued above
+		 * the the amount needed to prevent starvation, the queue limit
+		 * can be decreased.  To avoid hysteresis we consider the
+		 * minimum amount of slack found over several iterations of the
+		 * completion routine.
+		 */
+		unsigned int slack, slack_last_objs;
+
+		/*
+		 * Slack is the maximum of
+		 *   - The queue limit plus previous over-limit minus twice
+		 *     the number of objects completed.  Note that two times
+		 *     number of completed bytes is a basis for an upper bound
+		 *     of the limit.
+		 *   - Portion of objects in the last queuing operation that
+		 *     was not part of non-zero previous over-limit.  That is
+		 *     "round down" by non-overlimit portion of the last
+		 *     queueing operation.
+		 */
+		slack = POSDIFF(limit + dql->prev_ovlimit,
+		    2 * (completed - dql->num_completed));
+		slack_last_objs = dql->prev_ovlimit ?
+		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+		slack = max(slack, slack_last_objs);
+
+		if (slack < dql->lowest_slack)
+			dql->lowest_slack = slack;
+
+		if (time_after(jiffies,
+			       dql->slack_start_time + dql->slack_hold_time)) {
+			limit = POSDIFF(limit, dql->lowest_slack);
+			dql->slack_start_time = jiffies;
+			dql->lowest_slack = UINT_MAX;
+		}
+	}
+
+	/* Enforce bounds on limit */
+	limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+	if (limit != dql->limit) {
+		dql->limit = limit;
+		ovlimit = 0;
+	}
+
+	dql->adj_limit = limit + completed;
+	dql->prev_ovlimit = ovlimit;
+	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->num_completed = completed;
+	dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+	/* Reset all dynamic values */
+	dql->limit = 0;
+	dql->num_queued = 0;
+	dql->num_completed = 0;
+	dql->last_obj_cnt = 0;
+	dql->prev_num_queued = 0;
+	dql->prev_last_obj_cnt = 0;
+	dql->prev_ovlimit = 0;
+	dql->lowest_slack = UINT_MAX;
+	dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+	dql->max_limit = DQL_MAX_LIMIT;
+	dql->min_limit = 0;
+	dql->slack_hold_time = hold_time;
+	dql_reset(dql);
+	return 0;
+}
+EXPORT_SYMBOL(dql_init);
-- 
cgit v1.2.3


From 8af2a218de38f51ea4b4fa48cac1273319ae260c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Dec 2011 06:06:03 +0000
Subject: sch_red: Adaptative RED AQM

Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :

http://icir.org/floyd/papers/adaptiveRed.pdf

Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2

Every 500 ms:
 if (avg > target and max_p <= 0.5)
  increase max_p : max_p += alpha;
 else if (avg < target and max_p >= 0.01)
  decrease max_p : max_p *= beta;

target :[min_th + 0.4*(min_th - max_th),
          min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)

Changes against our RED implementation are :

max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.

To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.

dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.

Example on a 10Mbit link :

tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
   limit 400000 min 30000 max 90000 avpkt 1000 \
   burst 55 ecn adaptative bandwidth 10Mbit

# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
 rate 9749Kbit 831pps backlog 72056b 16p requeues 0
  marked 1357 early 35 pdrop 0 other 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   6 ++-
 include/net/red.h         | 101 ++++++++++++++++++++++++++++++++++++++--------
 lib/reciprocal_div.c      |   2 +
 net/sched/sch_red.c       |  21 ++++++++++
 4 files changed, 111 insertions(+), 19 deletions(-)

(limited to 'lib')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index fb556dc594d..e41e0d4de24 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -181,6 +181,7 @@ enum {
 	TCA_RED_UNSPEC,
 	TCA_RED_PARMS,
 	TCA_RED_STAB,
+	TCA_RED_MAX_P,
 	__TCA_RED_MAX,
 };
 
@@ -194,8 +195,9 @@ struct tc_red_qopt {
 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
 	unsigned char   Scell_log;	/* cell size for idle damping */
 	unsigned char	flags;
-#define TC_RED_ECN	1
-#define TC_RED_HARDDROP	2
+#define TC_RED_ECN		1
+#define TC_RED_HARDDROP		2
+#define TC_RED_ADAPTATIVE	4
 };
 
 struct tc_red_xstats {
diff --git a/include/net/red.h b/include/net/red.h
index b72a3b83393..24606b22d01 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -5,6 +5,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/dsfield.h>
+#include <linux/reciprocal_div.h>
 
 /*	Random Early Detection (RED) algorithm.
 	=======================================
@@ -87,6 +88,29 @@
 	etc.
  */
 
+/*
+ * Adaptative RED : An Algorithm for Increasing the Robustness of RED's AQM
+ * (Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker) August 2001
+ *
+ * Every 500 ms:
+ *  if (avg > target and max_p <= 0.5)
+ *   increase max_p : max_p += alpha;
+ *  else if (avg < target and max_p >= 0.01)
+ *   decrease max_p : max_p *= beta;
+ *
+ * target :[qth_min + 0.4*(qth_min - qth_max),
+ *          qth_min + 0.6*(qth_min - qth_max)].
+ * alpha : min(0.01, max_p / 4)
+ * beta : 0.9
+ * max_P is a Q0.32 fixed point number (with 32 bits mantissa)
+ * max_P between 0.01 and 0.5 (1% - 50%) [ Its no longer a negative power of two ]
+ */
+#define RED_ONE_PERCENT ((u32)DIV_ROUND_CLOSEST(1ULL<<32, 100))
+
+#define MAX_P_MIN (1 * RED_ONE_PERCENT)
+#define MAX_P_MAX (50 * RED_ONE_PERCENT)
+#define MAX_P_ALPHA(val) min(MAX_P_MIN, val / 4)
+
 #define RED_STAB_SIZE	256
 #define RED_STAB_MASK	(RED_STAB_SIZE - 1)
 
@@ -101,10 +125,14 @@ struct red_stats {
 
 struct red_parms {
 	/* Parameters */
-	u32		qth_min;	/* Min avg length threshold: A scaled */
-	u32		qth_max;	/* Max avg length threshold: A scaled */
+	u32		qth_min;	/* Min avg length threshold: Wlog scaled */
+	u32		qth_max;	/* Max avg length threshold: Wlog scaled */
 	u32		Scell_max;
-	u32		Rmask;		/* Cached random mask, see red_rmask */
+	u32		max_P;		/* probability, [0 .. 1.0] 32 scaled */
+	u32		max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */
+	u32		qth_delta;	/* max_th - min_th */
+	u32		target_min;	/* min_th + 0.4*(max_th - min_th) */
+	u32		target_max;	/* min_th + 0.6*(max_th - min_th) */
 	u8		Scell_log;
 	u8		Wlog;		/* log(W)		*/
 	u8		Plog;		/* random number bits	*/
@@ -115,19 +143,22 @@ struct red_parms {
 					   number generation */
 	u32		qR;		/* Cached random number */
 
-	unsigned long	qavg;		/* Average queue length: A scaled */
+	unsigned long	qavg;		/* Average queue length: Wlog scaled */
 	ktime_t		qidlestart;	/* Start of current idle period */
 };
 
-static inline u32 red_rmask(u8 Plog)
+static inline u32 red_maxp(u8 Plog)
 {
-	return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
+	return Plog < 32 ? (~0U >> Plog) : ~0U;
 }
 
+
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 				 u8 Scell_log, u8 *stab)
 {
+	int delta = qth_max - qth_min;
+
 	/* Reset average queue length, the value is strictly bound
 	 * to the parameters below, reseting hurts a bit but leaving
 	 * it might result in an unreasonable qavg for a while. --TGR
@@ -139,14 +170,29 @@ static inline void red_set_parms(struct red_parms *p,
 	p->qth_max	= qth_max << Wlog;
 	p->Wlog		= Wlog;
 	p->Plog		= Plog;
-	p->Rmask	= red_rmask(Plog);
+	if (delta < 0)
+		delta = 1;
+	p->qth_delta	= delta;
+	p->max_P	= red_maxp(Plog);
+	p->max_P	*= delta; /* max_P = (qth_max-qth_min)/2^Plog */
+
+	p->max_P_reciprocal  = reciprocal_value(p->max_P / delta);
+
+	/* RED Adaptative target :
+	 * [min_th + 0.4*(min_th - max_th),
+	 *  min_th + 0.6*(min_th - max_th)].
+	 */
+	delta /= 5;
+	p->target_min = qth_min + 2*delta;
+	p->target_max = qth_min + 3*delta;
+
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
 	memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
-static inline int red_is_idling(struct red_parms *p)
+static inline int red_is_idling(const struct red_parms *p)
 {
 	return p->qidlestart.tv64 != 0;
 }
@@ -168,7 +214,7 @@ static inline void red_restart(struct red_parms *p)
 	p->qcount = -1;
 }
 
-static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
+static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms *p)
 {
 	s64 delta = ktime_us_delta(ktime_get(), p->qidlestart);
 	long us_idle = min_t(s64, delta, p->Scell_max);
@@ -215,7 +261,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 	}
 }
 
-static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
+static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p,
 						       unsigned int backlog)
 {
 	/*
@@ -230,7 +276,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
 	return p->qavg + (backlog - (p->qavg >> p->Wlog));
 }
 
-static inline unsigned long red_calc_qavg(struct red_parms *p,
+static inline unsigned long red_calc_qavg(const struct red_parms *p,
 					  unsigned int backlog)
 {
 	if (!red_is_idling(p))
@@ -239,23 +285,24 @@ static inline unsigned long red_calc_qavg(struct red_parms *p,
 		return red_calc_qavg_from_idle_time(p);
 }
 
-static inline u32 red_random(struct red_parms *p)
+
+static inline u32 red_random(const struct red_parms *p)
 {
-	return net_random() & p->Rmask;
+	return reciprocal_divide(net_random(), p->max_P_reciprocal);
 }
 
-static inline int red_mark_probability(struct red_parms *p, unsigned long qavg)
+static inline int red_mark_probability(const struct red_parms *p, unsigned long qavg)
 {
 	/* The formula used below causes questions.
 
-	   OK. qR is random number in the interval 0..Rmask
+	   OK. qR is random number in the interval
+		(0..1/max_P)*(qth_max-qth_min)
 	   i.e. 0..(2^Plog). If we used floating point
 	   arithmetics, it would be: (2^Plog)*rnd_num,
 	   where rnd_num is less 1.
 
 	   Taking into account, that qavg have fixed
-	   point at Wlog, and Plog is related to max_P by
-	   max_P = (qth_max-qth_min)/2^Plog; two lines
+	   point at Wlog, two lines
 	   below have the following floating point equivalent:
 
 	   max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
@@ -315,4 +362,24 @@ static inline int red_action(struct red_parms *p, unsigned long qavg)
 	return RED_DONT_MARK;
 }
 
+static inline void red_adaptative_algo(struct red_parms *p)
+{
+	unsigned long qavg;
+	u32 max_p_delta;
+
+	qavg = p->qavg;
+	if (red_is_idling(p))
+		qavg = red_calc_qavg_from_idle_time(p);
+
+	/* p->qavg is fixed point number with point at Wlog */
+	qavg >>= p->Wlog;
+
+	if (qavg > p->target_max && p->max_P <= MAX_P_MAX)
+		p->max_P += MAX_P_ALPHA(p->max_P); /* maxp = maxp + alpha */
+	else if (qavg < p->target_min && p->max_P >= MAX_P_MIN)
+		p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
+
+	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
+	p->max_P_reciprocal = reciprocal_value(max_p_delta);
+}
 #endif
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 6a3bd48fa2a..75510e94f7d 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,5 +1,6 @@
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
+#include <linux/export.h>
 
 u32 reciprocal_value(u32 k)
 {
@@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k)
 	do_div(val, k);
 	return (u32)val;
 }
+EXPORT_SYMBOL(reciprocal_value);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd..8f5a85bf9d1 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
 struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
+	struct timer_list	adapt_timer;
 	struct red_parms	parms;
 	struct red_stats	stats;
 	struct Qdisc		*qdisc;
@@ -161,6 +162,8 @@ static void red_reset(struct Qdisc *sch)
 static void red_destroy(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->adapt_timer);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -209,6 +212,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 				 ctl->Plog, ctl->Scell_log,
 				 nla_data(tb[TCA_RED_STAB]));
 
+	del_timer(&q->adapt_timer);
+	if (ctl->flags & TC_RED_ADAPTATIVE)
+		mod_timer(&q->adapt_timer, jiffies + HZ/2);
+
 	if (!q->qdisc->q.qlen)
 		red_start_of_idle_period(&q->parms);
 
@@ -216,11 +223,24 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
+static inline void red_adaptative_timer(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct red_sched_data *q = qdisc_priv(sch);
+	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+	spin_lock(root_lock);
+	red_adaptative_algo(&q->parms);
+	mod_timer(&q->adapt_timer, jiffies + HZ/2);
+	spin_unlock(root_lock);
+}
+
 static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	q->qdisc = &noop_qdisc;
+	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
 	return red_change(sch, opt);
 }
 
@@ -243,6 +263,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-- 
cgit v1.2.3