diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 15:51:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 15:51:09 -0700 |
commit | dbe69e43372212527abf48609aba7fc39a6daa27 (patch) | |
tree | 96cfafdf70f5325ceeac1054daf7deca339c9730 /net/xfrm | |
parent | a6eaf3850cb171c328a8b0db6d3c79286a1eba9d (diff) | |
parent | b6df00789e2831fff7a2c65aa7164b2a4dcbe599 (diff) |
Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- BPF:
- add syscall program type and libbpf support for generating
instructions and bindings for in-kernel BPF loaders (BPF loaders
for BPF), this is a stepping stone for signed BPF programs
- infrastructure to migrate TCP child sockets from one listener to
another in the same reuseport group/map to improve flexibility
of service hand-off/restart
- add broadcast support to XDP redirect
- allow bypass of the lockless qdisc to improving performance (for
pktgen: +23% with one thread, +44% with 2 threads)
- add a simpler version of "DO_ONCE()" which does not require jump
labels, intended for slow-path usage
- virtio/vsock: introduce SOCK_SEQPACKET support
- add getsocketopt to retrieve netns cookie
- ip: treat lowest address of a IPv4 subnet as ordinary unicast
address allowing reclaiming of precious IPv4 addresses
- ipv6: use prandom_u32() for ID generation
- ip: add support for more flexible field selection for hashing
across multi-path routes (w/ offload to mlxsw)
- icmp: add support for extended RFC 8335 PROBE (ping)
- seg6: add support for SRv6 End.DT46 behavior
- mptcp:
- DSS checksum support (RFC 8684) to detect middlebox meddling
- support Connection-time 'C' flag
- time stamping support
- sctp: packetization Layer Path MTU Discovery (RFC 8899)
- xfrm: speed up state addition with seq set
- WiFi:
- hidden AP discovery on 6 GHz and other HE 6 GHz improvements
- aggregation handling improvements for some drivers
- minstrel improvements for no-ack frames
- deferred rate control for TXQs to improve reaction times
- switch from round robin to virtual time-based airtime scheduler
- add trace points:
- tcp checksum errors
- openvswitch - action execution, upcalls
- socket errors via sk_error_report
Device APIs:
- devlink: add rate API for hierarchical control of max egress rate
of virtual devices (VFs, SFs etc.)
- don't require RCU read lock to be held around BPF hooks in NAPI
context
- page_pool: generic buffer recycling
New hardware/drivers:
- mobile:
- iosm: PCIe Driver for Intel M.2 Modem
- support for Qualcomm MSM8998 (ipa)
- WiFi: Qualcomm QCN9074 and WCN6855 PCI devices
- sparx5: Microchip SparX-5 family of Enterprise Ethernet switches
- Mellanox BlueField Gigabit Ethernet (control NIC of the DPU)
- NXP SJA1110 Automotive Ethernet 10-port switch
- Qualcomm QCA8327 switch support (qca8k)
- Mikrotik 10/25G NIC (atl1c)
Driver changes:
- ACPI support for some MDIO, MAC and PHY devices from Marvell and
NXP (our first foray into MAC/PHY description via ACPI)
- HW timestamping (PTP) support: bnxt_en, ice, sja1105, hns3, tja11xx
- Mellanox/Nvidia NIC (mlx5)
- NIC VF offload of L2 bridging
- support IRQ distribution to Sub-functions
- Marvell (prestera):
- add flower and match all
- devlink trap
- link aggregation
- Netronome (nfp): connection tracking offload
- Intel 1GE (igc): add AF_XDP support
- Marvell DPU (octeontx2): ingress ratelimit offload
- Google vNIC (gve): new ring/descriptor format support
- Qualcomm mobile (rmnet & ipa): inline checksum offload support
- MediaTek WiFi (mt76)
- mt7915 MSI support
- mt7915 Tx status reporting
- mt7915 thermal sensors support
- mt7921 decapsulation offload
- mt7921 enable runtime pm and deep sleep
- Realtek WiFi (rtw88)
- beacon filter support
- Tx antenna path diversity support
- firmware crash information via devcoredump
- Qualcomm WiFi (wcn36xx)
- Wake-on-WLAN support with magic packets and GTK rekeying
- Micrel PHY (ksz886x/ksz8081): add cable test support"
* tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2168 commits)
tcp: change ICSK_CA_PRIV_SIZE definition
tcp_yeah: check struct yeah size at compile time
gve: DQO: Fix off by one in gve_rx_dqo()
stmmac: intel: set PCI_D3hot in suspend
stmmac: intel: Enable PHY WOL option in EHL
net: stmmac: option to enable PHY WOL with PMT enabled
net: say "local" instead of "static" addresses in ndo_dflt_fdb_{add,del}
net: use netdev_info in ndo_dflt_fdb_{add,del}
ptp: Set lookup cookie when creating a PTP PPS source.
net: sock: add trace for socket errors
net: sock: introduce sk_error_report
net: dsa: replay the local bridge FDB entries pointing to the bridge dev too
net: dsa: ensure during dsa_fdb_offload_notify that dev_hold and dev_put are on the same dev
net: dsa: include fdb entries pointing to bridge in the host fdb list
net: dsa: include bridge addresses which are local in the host fdb list
net: dsa: sync static FDB entries on foreign interfaces to hardware
net: dsa: install the host MDB and FDB entries in the master's RX filter
net: dsa: reference count the FDB addresses at the cross-chip notifier level
net: dsa: introduce a separate cross-chip notifier type for host FDBs
net: dsa: reference count the MDB entries at the cross-chip notifier level
...
Diffstat (limited to 'net/xfrm')
-rw-r--r-- | net/xfrm/xfrm_device.c | 1 | ||||
-rw-r--r-- | net/xfrm/xfrm_hash.h | 7 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 6 | ||||
-rw-r--r-- | net/xfrm/xfrm_output.c | 131 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 23 | ||||
-rw-r--r-- | net/xfrm/xfrm_replay.c | 171 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 81 | ||||
-rw-r--r-- | net/xfrm/xfrm_user.c | 28 |
8 files changed, 329 insertions, 119 deletions
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6d6917b68856..e843b0d9e2a6 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -268,6 +268,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->num_exthdrs = 0; xso->flags = 0; xso->dev = NULL; + xso->real_dev = NULL; dev_put(dev); if (err != -EOPNOTSUPP) diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index ce66323102f9..d12bb906c9c9 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -131,6 +131,13 @@ __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } +static inline unsigned int +__xfrm_seq_hash(u32 seq, unsigned int hmask) +{ + unsigned int h = seq; + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + static inline unsigned int __idx_hash(u32 index, unsigned int hmask) { return (index ^ (index >> 8)) & hmask; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 1158cd0311d7..3df0861d4390 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -612,7 +612,7 @@ lock: goto drop_unlock; } - if (x->repl->check(x, skb, seq)) { + if (xfrm_replay_check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } @@ -660,12 +660,12 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->repl->recheck(x, skb, seq)) { + if (xfrm_replay_recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } - x->repl->advance(x, seq); + xfrm_replay_advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e4cb0ff4dcf4..229544bc70c2 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -77,6 +77,83 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) +{ + const unsigned char *nh = skb_network_header(skb); + unsigned int offset = sizeof(struct ipv6hdr); + unsigned int packet_len; + int found_rhdr = 0; + + packet_len = skb_tail_pointer(skb) - nh; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + + rt = (struct ipv6_rt_hdr *)(nh + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + net_dbg_ratelimited("mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) + return -EINVAL; + *nexthdr = &exthdr->nexthdr; + } + + return -EINVAL; +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) +{ + switch (x->type->proto) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + return mip6_rthdr_offset(skb, prevhdr, x->type->proto); +#endif + default: + break; + } + + return ip6_find_1stfragopt(skb, prevhdr); +} +#endif + /* Add encapsulation header. * * The IP header and mutable extension headers will be moved forward to make @@ -92,7 +169,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -122,7 +199,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -448,7 +525,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - err = x->repl->overflow(x, skb); + err = xfrm_replay_overflow(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); goto error; @@ -565,6 +642,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb return 0; } +/* For partial checksum offload, the outer header checksum is calculated + * by software and the inner header checksum is calculated by hardware. + * This requires hardware to know the inner packet type to calculate + * the inner header checksum. Save inner ip protocol here to avoid + * traversing the packet in the vendor's xmit code. + * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise, + * get the ip protocol from the IP header. + */ +static void xfrm_get_inner_ipproto(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + const struct ethhdr *eth; + + if (!xo) + return; + + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { + xo->inner_ipproto = skb->inner_ipproto; + return; + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) + return; + + eth = (struct ethhdr *)skb_inner_mac_header(skb); + + switch (ntohs(eth->h_proto)) { + case ETH_P_IPV6: + xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; + break; + case ETH_P_IP: + xo->inner_ipproto = inner_ip_hdr(skb)->protocol; + break; + } +} + int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -594,12 +707,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -ENOMEM; } - skb->encapsulation = 1; sp->olen++; sp->xvec[sp->len++] = x; xfrm_state_hold(x); + if (skb->encapsulation) + xfrm_get_inner_ipproto(skb); + skb->encapsulation = 1; + if (skb_is_gso(skb)) { if (skb->inner_protocol) return xfrm_output_gso(net, sk, skb); @@ -711,15 +827,8 @@ out: static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) - unsigned int ptr = 0; int err; - if (x->outer_mode.encap == XFRM_MODE_BEET && - ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) { - net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n"); - return -EAFNOSUPPORT; - } - err = xfrm6_tunnel_check_size(skb); if (err) return err; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e70cf1d2c0e0..827d84255021 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2091,12 +2091,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - rcu_read_lock(); retry: - do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); - chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + rcu_read_lock(); + + chain = policy_hash_direct(net, daddr, saddr, family, dir); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); + goto retry; + } ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2127,11 +2130,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); goto retry; + } - if (ret && !xfrm_pol_hold_rcu(ret)) + if (ret && !xfrm_pol_hold_rcu(ret)) { + rcu_read_unlock(); goto retry; + } fail: rcu_read_unlock(); @@ -3245,7 +3252,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, /* * 0 or more than 0 is returned when validation is succeeded (either bypass - * because of optional transport mode, or next index of the mathced secpath + * because of optional transport mode, or next index of the matched secpath * state with the template. * -1 is returned when no matching template is found. * Otherwise "-2 - errored_index" is returned. diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index c6a4338a0d08..9277d81b344c 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -34,8 +34,11 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) return seq_hi; } EXPORT_SYMBOL(xfrm_replay_seqhi); -; -static void xfrm_replay_notify(struct xfrm_state *x, int event) + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event); +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event); + +void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; /* we send notify messages in case @@ -48,6 +51,17 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) * The state structure must be locked! */ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + xfrm_replay_notify_bmp(x, event); + return; + case XFRM_REPLAY_MODE_ESN: + xfrm_replay_notify_esn(x, event); + return; + } + switch (event) { case XFRM_REPLAY_UPDATE: if (!x->replay_maxdiff || @@ -81,7 +95,7 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } -static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct net *net = xs_net(x); @@ -98,14 +112,14 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) return err; } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } -static int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) +static int xfrm_replay_check_legacy(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) { u32 diff; u32 seq = ntohl(net_seq); @@ -136,14 +150,26 @@ err: return -EINVAL; } -static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq); +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq); + +void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { - u32 diff; - u32 seq = ntohl(net_seq); + u32 diff, seq; + + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_advance_bmp(x, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_advance_esn(x, net_seq); + } if (!x->props.replay_window) return; + seq = ntohl(net_seq); if (seq > x->replay.seq) { diff = seq - x->replay.seq; if (diff < x->props.replay_window) @@ -157,7 +183,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -178,7 +204,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) return err; } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -273,7 +299,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -416,7 +442,7 @@ static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) } } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -481,6 +507,21 @@ err: return -EINVAL; } +int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_check_bmp(x, skb, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_check_esn(x, skb, net_seq); + } + + return xfrm_replay_check_legacy(x, skb, net_seq); +} + static int xfrm_replay_recheck_esn(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { @@ -493,6 +534,22 @@ static int xfrm_replay_recheck_esn(struct xfrm_state *x, return xfrm_replay_check_esn(x, skb, net_seq); } +int xfrm_replay_recheck(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + /* no special recheck treatment */ + return xfrm_replay_check_bmp(x, skb, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_recheck_esn(x, skb, net_seq); + } + + return xfrm_replay_check_legacy(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -548,7 +605,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } #ifdef CONFIG_XFRM_OFFLOAD @@ -560,7 +617,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk __u32 oseq = x->replay.oseq; if (!xo) - return xfrm_replay_overflow(x, skb); + return __xfrm_replay_overflow(x, skb); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { if (!skb_is_gso(skb)) { @@ -585,7 +642,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk x->replay.oseq = oseq; if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -625,7 +682,7 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -674,59 +731,39 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff replay_esn->oseq = oseq; if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } -static const struct xfrm_replay xfrm_replay_legacy = { - .advance = xfrm_replay_advance, - .check = xfrm_replay_check, - .recheck = xfrm_replay_check, - .notify = xfrm_replay_notify, - .overflow = xfrm_replay_overflow_offload, -}; - -static const struct xfrm_replay xfrm_replay_bmp = { - .advance = xfrm_replay_advance_bmp, - .check = xfrm_replay_check_bmp, - .recheck = xfrm_replay_check_bmp, - .notify = xfrm_replay_notify_bmp, - .overflow = xfrm_replay_overflow_offload_bmp, -}; - -static const struct xfrm_replay xfrm_replay_esn = { - .advance = xfrm_replay_advance_esn, - .check = xfrm_replay_check_esn, - .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_esn, - .overflow = xfrm_replay_overflow_offload_esn, -}; +int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_overflow_offload_bmp(x, skb); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_overflow_offload_esn(x, skb); + } + + return xfrm_replay_overflow_offload(x, skb); +} #else -static const struct xfrm_replay xfrm_replay_legacy = { - .advance = xfrm_replay_advance, - .check = xfrm_replay_check, - .recheck = xfrm_replay_check, - .notify = xfrm_replay_notify, - .overflow = xfrm_replay_overflow, -}; - -static const struct xfrm_replay xfrm_replay_bmp = { - .advance = xfrm_replay_advance_bmp, - .check = xfrm_replay_check_bmp, - .recheck = xfrm_replay_check_bmp, - .notify = xfrm_replay_notify_bmp, - .overflow = xfrm_replay_overflow_bmp, -}; - -static const struct xfrm_replay xfrm_replay_esn = { - .advance = xfrm_replay_advance_esn, - .check = xfrm_replay_check_esn, - .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_esn, - .overflow = xfrm_replay_overflow_esn, -}; +int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_overflow_bmp(x, skb); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_overflow_esn(x, skb); + } + + return __xfrm_replay_overflow(x, skb); +} #endif int xfrm_init_replay(struct xfrm_state *x) @@ -741,12 +778,12 @@ int xfrm_init_replay(struct xfrm_state *x) if (x->props.flags & XFRM_STATE_ESN) { if (replay_esn->replay_window == 0) return -EINVAL; - x->repl = &xfrm_replay_esn; + x->repl_mode = XFRM_REPLAY_MODE_ESN; } else { - x->repl = &xfrm_replay_bmp; + x->repl_mode = XFRM_REPLAY_MODE_BMP; } } else { - x->repl = &xfrm_replay_legacy; + x->repl_mode = XFRM_REPLAY_MODE_LEGACY; } return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4496f7efa220..a2f4001221d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,10 +78,16 @@ xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } +static unsigned int xfrm_seq_hash(struct net *net, u32 seq) +{ + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); +} + static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, struct hlist_head *nsrctable, struct hlist_head *nspitable, + struct hlist_head *nseqtable, unsigned int nhashmask) { struct hlist_node *tmp; @@ -106,6 +112,11 @@ static void xfrm_hash_transfer(struct hlist_head *list, nhashmask); hlist_add_head_rcu(&x->byspi, nspitable + h); } + + if (x->km.seq) { + h = __xfrm_seq_hash(x->km.seq, nhashmask); + hlist_add_head_rcu(&x->byseq, nseqtable + h); + } } } @@ -117,7 +128,7 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask) static void xfrm_hash_resize(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.state_hash_work); - struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; int i; @@ -137,6 +148,13 @@ static void xfrm_hash_resize(struct work_struct *work) xfrm_hash_free(nsrc, nsize); return; } + nseq = xfrm_hash_alloc(nsize); + if (!nseq) { + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); + xfrm_hash_free(nspi, nsize); + return; + } spin_lock_bh(&net->xfrm.xfrm_state_lock); write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); @@ -144,15 +162,17 @@ static void xfrm_hash_resize(struct work_struct *work) nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask); osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); + oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net); ohashmask = net->xfrm.state_hmask; rcu_assign_pointer(net->xfrm.state_bydst, ndst); rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); rcu_assign_pointer(net->xfrm.state_byspi, nspi); + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_hmask = nhashmask; write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); @@ -165,6 +185,7 @@ static void xfrm_hash_resize(struct work_struct *work) xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); + xfrm_hash_free(oseq, osize); } static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); @@ -621,6 +642,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); + INIT_HLIST_NODE(&x->byseq); hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); x->mtimer.function = xfrm_timer_handler; timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); @@ -664,6 +686,8 @@ int __xfrm_state_delete(struct xfrm_state *x) list_del(&x->km.all); hlist_del_rcu(&x->bydst); hlist_del_rcu(&x->bysrc); + if (x->km.seq) + hlist_del_rcu(&x->byseq); if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; @@ -1148,6 +1172,10 @@ found: h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } + if (x->km.seq) { + h = xfrm_seq_hash(net, x->km.seq); + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); + } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), @@ -1263,6 +1291,12 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } + if (x->km.seq) { + h = xfrm_seq_hash(net, x->km.seq); + + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); + } + hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); @@ -1932,20 +1966,18 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { - int i; - - for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct xfrm_state *x; + unsigned int h = xfrm_seq_hash(net, seq); + struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { - if (x->km.seq == seq && - (mark & x->mark.m) == x->mark.v && - x->km.state == XFRM_STATE_ACQ) { - xfrm_state_hold(x); - return x; - } + hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + if (x->km.seq == seq && + (mark & x->mark.m) == x->mark.v && + x->km.state == XFRM_STATE_ACQ) { + xfrm_state_hold(x); + return x; } } + return NULL; } @@ -2145,7 +2177,7 @@ static void xfrm_replay_timer_handler(struct timer_list *t) if (x->km.state == XFRM_STATE_VALID) { if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_TIMEOUT); + xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -2518,7 +2550,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2549,7 +2581,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(xfrm_state_mtu); +EXPORT_SYMBOL_GPL(__xfrm_state_mtu); + +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +{ + mtu = __xfrm_state_mtu(x, mtu); + + if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) + return IPV6_MIN_MTU; + + return mtu; +} int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { @@ -2660,6 +2702,9 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_byspi = xfrm_hash_alloc(sz); if (!net->xfrm.state_byspi) goto out_byspi; + net->xfrm.state_byseq = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byseq) + goto out_byseq; net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); net->xfrm.state_num = 0; @@ -2669,6 +2714,8 @@ int __net_init xfrm_state_init(struct net *net) &net->xfrm.xfrm_state_lock); return 0; +out_byseq: + xfrm_hash_free(net->xfrm.state_byspi, sz); out_byspi: xfrm_hash_free(net->xfrm.state_bysrc, sz); out_bysrc: @@ -2688,6 +2735,8 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byseq)); + xfrm_hash_free(net->xfrm.state_byseq, sz); WARN_ON(!hlist_empty(net->xfrm.state_byspi)); xfrm_hash_free(net->xfrm.state_byspi, sz); WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f0aecee4d539..b47d613409b7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -580,6 +580,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + if (attrs[XFRMA_SA_EXTRA_FLAGS]) x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); @@ -600,23 +614,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, attrs[XFRMA_ALG_COMP]))) goto error; - if (attrs[XFRMA_ENCAP]) { - x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), - sizeof(*x->encap), GFP_KERNEL); - if (x->encap == NULL) - goto error; - } - if (attrs[XFRMA_TFCPAD]) x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); - if (attrs[XFRMA_COADDR]) { - x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), - sizeof(*x->coaddr), GFP_KERNEL); - if (x->coaddr == NULL) - goto error; - } - xfrm_mark_get(attrs, &x->mark); xfrm_smark_init(attrs, &x->props.smark); |