summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/ip6_fib.c9
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/mcast.c28
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c22
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c2
-rw-r--r--net/ipv6/output_core.c28
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c139
-rw-r--r--net/ipv6/seg6_local.c94
-rw-r--r--net/ipv6/sit.c9
-rw-r--r--net/ipv6/sysctl_net_ipv6.c31
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/udp.c3
17 files changed, 337 insertions, 103 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b0ef65eb9bd2..3bf685fe64b9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
- BUG();
+ return -EINVAL;
if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
@@ -6903,10 +6903,10 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "addr_gen_mode",
- .data = &ipv6_devconf.addr_gen_mode,
- .maxlen = sizeof(int),
- .mode = 0644,
+ .procname = "addr_gen_mode",
+ .data = &ipv6_devconf.addr_gen_mode,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = addrconf_sysctl_addr_gen_mode,
},
{
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8f9a83314de7..40f3e4f9f33a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -467,7 +467,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
static int __net_init fib6_rules_net_init(struct net *net)
{
struct fib_rules_ops *ops;
- int err = -ENOMEM;
+ int err;
ops = fib_rules_register(&fib6_rules_ops_template, net);
if (IS_ERR(ops))
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 679699e953f1..2d650dc24349 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -32,6 +32,7 @@
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
+#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net)
if (err)
return err;
+ /* Default to 3-tuple */
+ net->ipv6.sysctl.multipath_hash_fields =
+ FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
@@ -2362,7 +2367,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
if (!net->ipv6.rt6_stats)
- goto out_timer;
+ goto out_notifier;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
@@ -2407,7 +2412,7 @@ out_fib_table_hash:
kfree(net->ipv6.fib_table_hash);
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
-out_timer:
+out_notifier:
fib6_notifier_exit(net);
return -ENOMEM;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ff4f9ebcf7f6..984050f35c61 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* ip6_route_output will fail given src=any saddr, though, so
* that's why we try it again later.
*/
- if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ if (ipv6_addr_any(&fl6->saddr)) {
struct fib6_info *from;
struct rt6_info *rt;
- bool had_dst = *dst != NULL;
- if (!had_dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
rcu_read_lock();
@@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* never existed and let the SA-enabled version take
* over.
*/
- if (!had_dst && (*dst)->error) {
+ if ((*dst)->error) {
dst_release(*dst);
*dst = NULL;
}
@@ -1555,7 +1553,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@@ -1582,17 +1580,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len;
+ alloc_extra += dst_exthdrlen;
+ alloc_extra += rt->dst.trailer_len;
+
+ /* We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloc_extra += sizeof(struct frag_hdr);
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
-
- alloclen += dst_exthdrlen;
+ alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@@ -1602,30 +1611,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
- alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
- /*
- * We just reserve space for fragment header.
- * Note: this may be overallocation if the message
- * (without MSG_MORE) fits into the MTU.
- */
- alloclen += sizeof(struct frag_hdr);
-
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..0b8a38687ce4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
skb->dev = tunnel->dev;
+ skb_reset_mac_header(skb);
}
skb_reset_network_header(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 0d59efb6b49e..54ec163fbafa 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1729,26 +1729,26 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
{
+ u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT,
+ 2, 0, 0, IPV6_TLV_PADN, 0 };
struct net_device *dev = idev->dev;
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
- struct sk_buff *skb;
- struct mld2_report *pmr;
- struct in6_addr addr_buf;
- const struct in6_addr *saddr;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu + hlen + tlen;
+ struct net *net = dev_net(dev);
+ const struct in6_addr *saddr;
+ struct in6_addr addr_buf;
+ struct mld2_report *pmr;
+ struct sk_buff *skb;
+ unsigned int size;
+ struct sock *sk;
int err;
- u8 ra[8] = { IPPROTO_ICMPV6, 0,
- IPV6_TLV_ROUTERALERT, 2, 0, 0,
- IPV6_TLV_PADN, 0 };
- /* we assume size > sizeof(ra) here */
- /* limit our allocations to order-0 page */
- size = min_t(int, size, SKB_MAX_ORDER(0, 0));
+ sk = net->ipv6.igmp_sk;
+ /* we assume size > sizeof(ra) here
+ * Also try to not allocate high-order pages for big MTU
+ */
+ size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
skb = sock_alloc_send_skb(sk, size, 1, &err);
-
if (!skb)
return NULL;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e810a23baf99..de2cf3943b91 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -51,7 +51,7 @@ ip6_packet_match(const struct sk_buff *skb,
const char *outdev,
const struct ip6t_ip6 *ip6info,
unsigned int *protoff,
- int *fragoff, bool *hotdrop)
+ u16 *fragoff, bool *hotdrop)
{
unsigned long ret;
const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index e204163c7036..92f3235fa287 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
}
EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
+{
+ if (likely(next != IPPROTO_ICMPV6))
+ return false;
+
+ if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
+ return false;
+
+ return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
+}
+
void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
- nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
+ nft_hook(pkt) == NF_INET_INGRESS) {
+ if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
+ nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
}
*dest = 0;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 7969d1f3018d..ed69c768797e 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -28,7 +28,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+ nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb,
nft_hook(pkt));
break;
default:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index af36acc1a644..2880dc7d9a49 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -15,29 +15,11 @@ static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
- const struct {
- struct in6_addr dst;
- struct in6_addr src;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .dst = *dst,
- .src = *src,
- };
- u32 hash, id;
-
- /* Note the following code is not safe, but this is okay. */
- if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
- get_random_bytes(&net->ipv4.ip_id_key,
- sizeof(net->ipv4.ip_id_key));
-
- hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
-
- /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
- * set the hight order instead thus minimizing possible future
- * collisions.
- */
- id = ip_idents_reserve(hash, 1);
- if (unlikely(!id))
- id = 1 << 31;
+ u32 id;
+
+ do {
+ id = prandom_u32();
+ } while (!id);
return id;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 47a0dc46cbdb..28e44782c94d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (!(fhdr->frag_off & htons(0xFFF9))) {
+ if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
/* It is not a fragmented frame */
skb->transport_header += sizeof(struct frag_hdr);
__IP6_INC_STATS(net,
@@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
+ sizeof(struct ipv6hdr);
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a22822bdbf39..7b756a7dc036 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2326,12 +2326,131 @@ out:
}
}
+static u32 rt6_multipath_custom_hash_outer(const struct net *net,
+ const struct sk_buff *skb,
+ bool *p_has_inner)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys keys, hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+ hash_keys.tags.flow_label = keys.tags.flow_label;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_inner(const struct net *net,
+ const struct sk_buff *skb,
+ bool has_inner)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys keys, hash_keys;
+
+ /* We assume the packet carries an encapsulation, but if none was
+ * encountered during dissection of the outer flow, then there is no
+ * point in calling the flow dissector again.
+ */
+ if (!has_inner)
+ return 0;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+ if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+ return 0;
+
+ if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+ hash_keys.tags.flow_label = keys.tags.flow_label;
+ }
+
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_skb(const struct net *net,
+ const struct sk_buff *skb)
+{
+ u32 mhash, mhash_inner;
+ bool has_inner = true;
+
+ mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
+ mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
+
+ return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
+ const struct flowi6 *fl6)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+ hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = fl6->fl6_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = fl6->fl6_dport;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
/* if skb is set it will be used and fl6 can be NULL */
u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
- u32 mhash;
+ u32 mhash = 0;
switch (ip6_multipath_hash_policy(net)) {
case 0:
@@ -2345,6 +2464,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 1:
if (skb) {
@@ -2376,6 +2496,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2412,9 +2533,15 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
+ break;
+ case 3:
+ if (skb)
+ mhash = rt6_multipath_custom_hash_skb(net, skb);
+ else
+ mhash = rt6_multipath_custom_hash_fl6(net, fl6);
break;
}
- mhash = flow_hash_from_keys(&hash_keys);
return mhash >> 1;
}
@@ -3673,11 +3800,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (nh) {
if (rt->fib6_src.plen) {
NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
- goto out;
+ goto out_free;
}
if (!nexthop_get(nh)) {
NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
- goto out;
+ goto out_free;
}
rt->nh = nh;
fib6_nh = nexthop_fib6_nh(rt->nh);
@@ -3714,6 +3841,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
out:
fib6_info_release(rt);
return ERR_PTR(err);
+out_free:
+ ip_fib_metrics_put(rt->fib6_metrics);
+ kfree(rt);
+ return ERR_PTR(err);
}
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 4ff38cb08f4b..60bf3b877957 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -87,10 +87,10 @@ struct seg6_end_dt_info {
int vrf_ifindex;
int vrf_table;
- /* tunneled packet proto and family (IPv4 or IPv6) */
- __be16 proto;
+ /* tunneled packet family (IPv4 or IPv6).
+ * Protocol and header length are inferred from family.
+ */
u16 family;
- int hdrlen;
};
struct pcpu_seg6_local_counters {
@@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
info->net = net;
info->vrf_ifindex = vrf_ifindex;
- switch (family) {
- case AF_INET:
- info->proto = htons(ETH_P_IP);
- info->hdrlen = sizeof(struct iphdr);
- break;
- case AF_INET6:
- info->proto = htons(ETH_P_IPV6);
- info->hdrlen = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
-
info->family = family;
info->mode = DT_VRF_MODE;
@@ -622,22 +609,44 @@ error:
}
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt, u16 family)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
struct net_device *vrf;
+ __be16 protocol;
+ int hdrlen;
vrf = end_dt_get_vrf_rcu(skb, info);
if (unlikely(!vrf))
goto drop;
- skb->protocol = info->proto;
+ switch (family) {
+ case AF_INET:
+ protocol = htons(ETH_P_IP);
+ hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ protocol = htons(ETH_P_IPV6);
+ hdrlen = sizeof(struct ipv6hdr);
+ break;
+ case AF_UNSPEC:
+ fallthrough;
+ default:
+ goto drop;
+ }
+
+ if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
+ pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
+ goto drop;
+ }
+
+ skb->protocol = protocol;
skb_dst_drop(skb);
- skb_set_transport_header(skb, info->hdrlen);
+ skb_set_transport_header(skb, hdrlen);
- return end_dt_vrf_rcv(skb, info->family, vrf);
+ return end_dt_vrf_rcv(skb, family, vrf);
drop:
kfree_skb(skb);
@@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
goto legacy_mode;
/* DT6_VRF_MODE */
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET6);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -767,6 +776,36 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
+}
+
+static int input_action_end_dt46(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ unsigned int off = 0;
+ int nexthdr;
+
+ nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
+ if (unlikely(nexthdr < 0))
+ goto drop;
+
+ switch (nexthdr) {
+ case IPPROTO_IPIP:
+ return input_action_end_dt4(skb, slwt);
+ case IPPROTO_IPV6:
+ return input_action_end_dt6(skb, slwt);
+ }
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+#endif
+
/* push an SRH on top of the current one */
static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dt6,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT46,
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt46,
+ .slwt_ops = {
+ .build_state = seg6_end_dt46_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.optattrs = SEG6_F_LOCAL_COUNTERS,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index aa98294a3ad3..df5bea818410 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
+ if (!parms->name[0])
+ strcpy(parms->name, dev->name);
+
return nt;
failed_free:
@@ -707,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
* old iph is no longer valid
*/
iph = (const struct iphdr *)skb_mac_header(skb);
+ skb_reset_mac_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -777,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
@@ -970,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (df) {
mtu = dst_mtu(&rt->dst) - t_hlen;
- if (mtu < 68) {
+ if (mtu < IPV4_MIN_MTU) {
dev->stats.collisions++;
ip_rt_put(rt);
goto tx_error;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 27102c3d6e1d..d7cf26f730d7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,13 +17,17 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
#include <net/netevent.h>
+#include <net/ip_fib.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
static int two = 2;
+static int three = 3;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static u32 rt6_multipath_hash_fields_all_mask =
+ FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -40,6 +44,22 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}
+static int
+proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_fields);
+ ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
+
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
@@ -149,7 +169,16 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = &three,
+ },
+ {
+ .procname = "fib_multipath_hash_fields",
+ .data = &init_net.ipv6.sysctl.multipath_hash_fields,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_fields,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &rt6_multipath_hash_fields_all_mask,
},
{
.procname = "seg6_flowlabel",
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f47c0b6e3de..4d71464094b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1538,6 +1538,7 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
+ trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1663,10 +1664,18 @@ process:
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
+ nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+ if (!nsk) {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ sk = nsk;
+ /* reuseport_migrate_sock() has already held one sk_refcnt
+ * before returning.
+ */
+ } else {
+ sock_hold(sk);
}
- sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
@@ -1754,6 +1763,7 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 199b080d418a..3fcd86f4dfdc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
lock_sock(sk);
+
+ /* protects from races with udp_abort() */
+ sock_set_flag(sk, SOCK_DEAD);
udp_v6_flush_pending_frames(sk);
release_sock(sk);