summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-08-31 12:34:00 -0700
committerDavid S. Miller <davem@davemloft.net>2015-08-31 12:34:00 -0700
commit9dc30648f0708cf063e29470d83f63f8dc8fc430 (patch)
treef12130b61955f4471ebd61474244ecf9ebdc0858 /net/ipv4
parent87583ebb9f6ea6dc7f8ef167b815656787e429fc (diff)
parentc3a8d9474684d391b0afc3970d9b249add15ec07 (diff)
Merge branch 'per-route-dctcp-receive-side'
Daniel Borkmann says: ==================== tcp: receive-side per route dctcp handling Original cover letter: Currently, the following case doesn't use DCTCP, even if it should: - responder has f.e. cubic as system wide default - 'ip route congctl dctcp $src' was set Then, DCTCP is NOT used if a DCTCP sender attempts to connect from a host in the $src range: ECT(0) is set, but listen_sk is not dctcp, so we fail the INET_ECN_is_not_ect sanity check. We also have to examine the dst used for the SYN/ACK reply to make this case work. In order to minimize additional cost, store the 'ecn is must have' information is the dst_features field. The set targets -next instead of -net since this doesn't seem to be a serious bug and to give the change more soak time until it hits linus tree. v1 -> v2: - Addressed Dave's feedback, not exposing any bits to user space - Added patch 3 to reject incorrect configurations - Rest as is, rebased and retested ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_semantics.c77
-rw-r--r--net/ipv4/tcp_cong.c9
-rw-r--r--net/ipv4/tcp_input.c7
3 files changed, 58 insertions, 35 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1b2d01170a4d..992a9597daf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -876,6 +876,50 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
return true;
}
+static int
+fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
+{
+ bool ecn_ca = false;
+ struct nlattr *nla;
+ int remaining;
+
+ if (!cfg->fc_mx)
+ return 0;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ if (val == TCP_CA_UNSPEC)
+ return -EINVAL;
+ } else {
+ val = nla_get_u32(nla);
+ }
+ if (type == RTAX_ADVMSS && val > 65535 - 40)
+ val = 65535 - 40;
+ if (type == RTAX_MTU && val > 65535 - 15)
+ val = 65535 - 15;
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ return -EINVAL;
+ fi->fib_metrics[type - 1] = val;
+ }
+
+ if (ecn_ca)
+ fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+
+ return 0;
+}
+
struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
@@ -948,36 +992,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
} endfor_nexthops(fi)
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- u32 val;
-
- if (type > RTAX_MAX)
- goto err_inval;
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp);
- if (val == TCP_CA_UNSPEC)
- goto err_inval;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_ADVMSS && val > 65535 - 40)
- val = 65535 - 40;
- if (type == RTAX_MTU && val > 65535 - 15)
- val = 65535 - 15;
- fi->fib_metrics[type - 1] = val;
- }
- }
- }
+ err = fib_convert_metrics(fi, cfg);
+ if (err)
+ goto failure;
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index a2ed23c595cf..93c4dc3ab23f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
-u32 tcp_ca_get_key_by_name(const char *name)
+u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
- u32 key;
+ u32 key = TCP_CA_UNSPEC;
might_sleep();
rcu_read_lock();
ca = __tcp_ca_find_autoload(name);
- key = ca ? ca->key : TCP_CA_UNSPEC;
+ if (ca) {
+ key = ca->key;
+ *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
+ }
rcu_read_unlock();
return key;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc08e2352665..a8f515bb19c4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6003,14 +6003,17 @@ static void tcp_ecn_create_request(struct request_sock *req,
const struct net *net = sock_net(listen_sk);
bool th_ecn = th->ece && th->cwr;
bool ect, ecn_ok;
+ u32 ecn_ok_dst;
if (!th_ecn)
return;
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
- ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
+ ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
+ ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
- if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
+ if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
+ (ecn_ok_dst & DST_FEATURE_ECN_CA))
inet_rsk(req)->ecn_ok = 1;
}