diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 10 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 9 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 40 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 1 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 28 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 22 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_reject_ipv6.c | 2 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 28 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 4 | ||||
-rw-r--r-- | net/ipv6/route.c | 139 | ||||
-rw-r--r-- | net/ipv6/seg6_local.c | 94 | ||||
-rw-r--r-- | net/ipv6/sit.c | 9 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 31 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 16 | ||||
-rw-r--r-- | net/ipv6/udp.c | 3 |
17 files changed, 337 insertions, 103 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b0ef65eb9bd2..3bf685fe64b9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), @@ -6903,10 +6903,10 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { - .procname = "addr_gen_mode", - .data = &ipv6_devconf.addr_gen_mode, - .maxlen = sizeof(int), - .mode = 0644, + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = addrconf_sysctl_addr_gen_mode, }, { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8f9a83314de7..40f3e4f9f33a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -467,7 +467,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { static int __net_init fib6_rules_net_init(struct net *net) { struct fib_rules_ops *ops; - int err = -ENOMEM; + int err; ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 679699e953f1..2d650dc24349 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -32,6 +32,7 @@ #include <net/lwtunnel.h> #include <net/fib_notifier.h> +#include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net) if (err) return err; + /* Default to 3-tuple */ + net->ipv6.sysctl.multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; + spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); INIT_LIST_HEAD(&net->ipv6.fib6_walkers); @@ -2362,7 +2367,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) - goto out_timer; + goto out_notifier; /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); @@ -2407,7 +2412,7 @@ out_fib_table_hash: kfree(net->ipv6.fib_table_hash); out_rt6_stats: kfree(net->ipv6.rt6_stats); -out_timer: +out_notifier: fib6_notifier_exit(net); return -ENOMEM; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ff4f9ebcf7f6..984050f35c61 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ - if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; - bool had_dst = *dst != NULL; - if (!had_dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; rcu_read_lock(); @@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * never existed and let the SA-enabled version take * over. */ - if (!had_dst && (*dst)->error) { + if ((*dst)->error) { dst_release(*dst); *dst = NULL; } @@ -1555,7 +1553,7 @@ emsgsize: unsigned int datalen; unsigned int fraglen; unsigned int fraggap; - unsigned int alloclen; + unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ @@ -1582,17 +1580,28 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; pagedlen = 0; + alloc_extra = hh_len; + alloc_extra += dst_exthdrlen; + alloc_extra += rt->dst.trailer_len; + + /* We just reserve space for fragment header. + * Note: this may be overallocation if the message + * (without MSG_MORE) fits into the MTU. + */ + alloc_extra += sizeof(struct frag_hdr); + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else if (!paged) + else if (!paged && + (fraglen + alloc_extra < SKB_MAX_ALLOC || + !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = min_t(int, fraglen, MAX_HEADER); pagedlen = fraglen - alloclen; } - - alloclen += dst_exthdrlen; + alloclen += alloc_extra; if (datalen != length + fraggap) { /* @@ -1602,30 +1611,21 @@ alloc_new_skb: datalen += rt->dst.trailer_len; } - alloclen += rt->dst.trailer_len; fraglen = datalen + fragheaderlen; - /* - * We just reserve space for fragment header. - * Note: this may be overallocation if the message - * (without MSG_MORE) fits into the MTU. - */ - alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; } if (transhdrlen) { - skb = sock_alloc_send_skb(sk, - alloclen + hh_len, + skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len, + skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 288bafded998..0b8a38687ce4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; + skb_reset_mac_header(skb); } skb_reset_network_header(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0d59efb6b49e..54ec163fbafa 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1729,26 +1729,26 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { + u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, + 2, 0, 0, IPV6_TLV_PADN, 0 }; struct net_device *dev = idev->dev; - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; - struct sk_buff *skb; - struct mld2_report *pmr; - struct in6_addr addr_buf; - const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - unsigned int size = mtu + hlen + tlen; + struct net *net = dev_net(dev); + const struct in6_addr *saddr; + struct in6_addr addr_buf; + struct mld2_report *pmr; + struct sk_buff *skb; + unsigned int size; + struct sock *sk; int err; - u8 ra[8] = { IPPROTO_ICMPV6, 0, - IPV6_TLV_ROUTERALERT, 2, 0, 0, - IPV6_TLV_PADN, 0 }; - /* we assume size > sizeof(ra) here */ - /* limit our allocations to order-0 page */ - size = min_t(int, size, SKB_MAX_ORDER(0, 0)); + sk = net->ipv6.igmp_sk; + /* we assume size > sizeof(ra) here + * Also try to not allocate high-order pages for big MTU + */ + size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen; skb = sock_alloc_send_skb(sk, size, 1, &err); - if (!skb) return NULL; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e810a23baf99..de2cf3943b91 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -51,7 +51,7 @@ ip6_packet_match(const struct sk_buff *skb, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, - int *fragoff, bool *hotdrop) + u16 *fragoff, bool *hotdrop) { unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index e204163c7036..92f3235fa287 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); +static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) +{ + if (likely(next != IPPROTO_ICMPV6)) + return false; + + if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) + return false; + + return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; +} + void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, nft_in(pkt)); - return; + if (nft_hook(pkt) == NF_INET_PRE_ROUTING || + nft_hook(pkt) == NF_INET_INGRESS) { + if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || + nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } } *dest = 0; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 7969d1f3018d..ed69c768797e 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -28,7 +28,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb, + nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; default: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index af36acc1a644..2880dc7d9a49 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -15,29 +15,11 @@ static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { - const struct { - struct in6_addr dst; - struct in6_addr src; - } __aligned(SIPHASH_ALIGNMENT) combined = { - .dst = *dst, - .src = *src, - }; - u32 hash, id; - - /* Note the following code is not safe, but this is okay. */ - if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) - get_random_bytes(&net->ipv4.ip_id_key, - sizeof(net->ipv4.ip_id_key)); - - hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); - - /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, - * set the hight order instead thus minimizing possible future - * collisions. - */ - id = ip_idents_reserve(hash, 1); - if (unlikely(!id)) - id = 1 << 31; + u32 id; + + do { + id = prandom_u32(); + } while (!id); return id; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 47a0dc46cbdb..28e44782c94d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (!(fhdr->frag_off & htons(0xFFF9))) { + if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); __IP6_INC_STATS(net, @@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) + + sizeof(struct ipv6hdr); return 1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a22822bdbf39..7b756a7dc036 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2326,12 +2326,131 @@ out: } } +static u32 rt6_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, + const struct flowi6 *fl6) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = fl6->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = fl6->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl6->flowi6_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl6->fl6_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl6->fl6_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl6 can be NULL */ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; - u32 mhash; + u32 mhash = 0; switch (ip6_multipath_hash_policy(net)) { case 0: @@ -2345,6 +2464,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 1: if (skb) { @@ -2376,6 +2496,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2412,9 +2533,15 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); + break; + case 3: + if (skb) + mhash = rt6_multipath_custom_hash_skb(net, skb); + else + mhash = rt6_multipath_custom_hash_fl6(net, fl6); break; } - mhash = flow_hash_from_keys(&hash_keys); return mhash >> 1; } @@ -3673,11 +3800,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (nh) { if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto out; + goto out_free; } if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; + goto out_free; } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); @@ -3714,6 +3841,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, out: fib6_info_release(rt); return ERR_PTR(err); +out_free: + ip_fib_metrics_put(rt->fib6_metrics); + kfree(rt); + return ERR_PTR(err); } int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 4ff38cb08f4b..60bf3b877957 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -87,10 +87,10 @@ struct seg6_end_dt_info { int vrf_ifindex; int vrf_table; - /* tunneled packet proto and family (IPv4 or IPv6) */ - __be16 proto; + /* tunneled packet family (IPv4 or IPv6). + * Protocol and header length are inferred from family. + */ u16 family; - int hdrlen; }; struct pcpu_seg6_local_counters { @@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, info->net = net; info->vrf_ifindex = vrf_ifindex; - switch (family) { - case AF_INET: - info->proto = htons(ETH_P_IP); - info->hdrlen = sizeof(struct iphdr); - break; - case AF_INET6: - info->proto = htons(ETH_P_IPV6); - info->hdrlen = sizeof(struct ipv6hdr); - break; - default: - return -EINVAL; - } - info->family = family; info->mode = DT_VRF_MODE; @@ -622,22 +609,44 @@ error: } static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, u16 family) { struct seg6_end_dt_info *info = &slwt->dt_info; struct net_device *vrf; + __be16 protocol; + int hdrlen; vrf = end_dt_get_vrf_rcu(skb, info); if (unlikely(!vrf)) goto drop; - skb->protocol = info->proto; + switch (family) { + case AF_INET: + protocol = htons(ETH_P_IP); + hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + protocol = htons(ETH_P_IPV6); + hdrlen = sizeof(struct ipv6hdr); + break; + case AF_UNSPEC: + fallthrough; + default: + goto drop; + } + + if (unlikely(info->family != AF_UNSPEC && info->family != family)) { + pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); + goto drop; + } + + skb->protocol = protocol; skb_dst_drop(skb); - skb_set_transport_header(skb, info->hdrlen); + skb_set_transport_header(skb, hdrlen); - return end_dt_vrf_rcv(skb, info->family, vrf); + return end_dt_vrf_rcv(skb, family, vrf); drop: kfree_skb(skb); @@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb, goto legacy_mode; /* DT6_VRF_MODE */ - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET6); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -767,6 +776,36 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); +} + +static int input_action_end_dt46(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + unsigned int off = 0; + int nexthdr; + + nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); + if (unlikely(nexthdr < 0)) + goto drop; + + switch (nexthdr) { + case IPPROTO_IPIP: + return input_action_end_dt4(skb, slwt); + case IPPROTO_IPV6: + return input_action_end_dt6(skb, slwt); + } + +drop: + kfree_skb(skb); + return -EINVAL; +} +#endif + /* push an SRH on top of the current one */ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dt6, }, { + .action = SEG6_LOCAL_ACTION_END_DT46, + .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), + .optattrs = SEG6_F_LOCAL_COUNTERS, +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt46, + .slwt_ops = { + .build_state = seg6_end_dt46_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_B6, .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .optattrs = SEG6_F_LOCAL_COUNTERS, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index aa98294a3ad3..df5bea818410 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (ipip6_tunnel_create(dev) < 0) goto failed_free; + if (!parms->name[0]) + strcpy(parms->name, dev->name); + return nt; failed_free: @@ -707,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb) * old iph is no longer valid */ iph = (const struct iphdr *)skb_mac_header(skb); + skb_reset_mac_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -777,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; + skb_reset_mac_header(skb); + return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } @@ -970,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (df) { mtu = dst_mtu(&rt->dst) - t_hlen; - if (mtu < 68) { + if (mtu < IPV4_MIN_MTU) { dev->stats.collisions++; ip_rt_put(rt); goto tx_error; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 27102c3d6e1d..d7cf26f730d7 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,13 +17,17 @@ #include <net/addrconf.h> #include <net/inet_frag.h> #include <net/netevent.h> +#include <net/ip_fib.h> #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif static int two = 2; +static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static u32 rt6_multipath_hash_fields_all_mask = + FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -40,6 +44,22 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, return ret; } +static int +proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv6.sysctl.multipath_hash_fields); + ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); + + return ret; +} + static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", @@ -149,7 +169,16 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, + }, + { + .procname = "fib_multipath_hash_fields", + .data = &init_net.ipv6.sysctl.multipath_hash_fields, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_rt6_multipath_hash_fields, + .extra1 = SYSCTL_ONE, + .extra2 = &rt6_multipath_hash_fields_all_mask, }, { .procname = "seg6_flowlabel", diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5f47c0b6e3de..4d71464094b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1538,6 +1538,7 @@ discard: kfree_skb(skb); return 0; csum_err: + trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1663,10 +1664,18 @@ process: goto csum_error; } if (unlikely(sk->sk_state != TCP_LISTEN)) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; + nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); + if (!nsk) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } + sk = nsk; + /* reuseport_migrate_sock() has already held one sk_refcnt + * before returning. + */ + } else { + sock_hold(sk); } - sock_hold(sk); refcounted = true; nsk = NULL; if (!tcp_filter(sk, skb)) { @@ -1754,6 +1763,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: + trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 199b080d418a..3fcd86f4dfdc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); |