diff options
author | David S. Miller <davem@davemloft.net> | 2022-05-16 10:31:06 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-05-16 10:31:06 +0100 |
commit | e97e68b56e78303581a03b26e95f6c0c03ecbbe2 (patch) | |
tree | 61574829a8a9c493ce90134c11bb6b87daa86ed7 /net | |
parent | 7fa2e481ff2fee20e0338d98489eb9f513ada45f (diff) | |
parent | eda090c31fe923ab9463b884469744ec903ab0cc (diff) |
Merge branch 'sk_bound_dev_if-annotations'
Eric Dumazet says:
====================
net: add annotations for sk->sk_bound_dev_if
While writes on sk->sk_bound_dev_if are protected by socket lock,
we have many lockless reads all over the places.
This is based on syzbot report found in the first patch changelog.
v2: inline ipv6 function only defined if IS_ENABLED(CONFIG_IPV6) (kernel bots)
Change the INET6_MATCH() to inet6_match(), this is no longer a macro.
Change INET_MATCH() to inet_match() (Olivier Hartkopp & Jakub Kicinski)
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 2 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 4 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 12 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 10 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 6 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 6 | ||||
-rw-r--r-- | net/ipv6/udp.c | 13 | ||||
-rw-r--r-- | net/l2tp/l2tp_ip.c | 4 | ||||
-rw-r--r-- | net/l2tp/l2tp_ip6.c | 8 | ||||
-rw-r--r-- | net/sched/em_meta.c | 7 | ||||
-rw-r--r-- | net/sctp/input.c | 4 |
13 files changed, 53 insertions, 36 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 24a46a1e4f28..2ff40dd0a7a6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -635,7 +635,9 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex) if (ifindex < 0) goto out; - sk->sk_bound_dev_if = ifindex; + /* Paired with all READ_ONCE() done locklessly. */ + WRITE_ONCE(sk->sk_bound_dev_if, ifindex); + if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); sk_dst_reset(sk); @@ -713,10 +715,11 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); struct net *net = sock_net(sk); char devname[IFNAMSIZ]; - if (sk->sk_bound_dev_if == 0) { + if (bound_dev_if == 0) { len = 0; goto zero; } @@ -725,7 +728,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, if (len < IFNAMSIZ) goto out; - ret = netdev_get_name(net, devname, sk->sk_bound_dev_if); + ret = netdev_get_name(net, devname, bound_dev_if); if (ret) goto out; @@ -1861,7 +1864,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_BINDTOIFINDEX: - v.val = sk->sk_bound_dev_if; + v.val = READ_ONCE(sk->sk_bound_dev_if); break; case SO_NETNS_COOKIE: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 82696ab86f74..307424872160 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -628,7 +628,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ir_mark = inet_request_mark(sk, skb); ireq->ireq_family = AF_INET; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4d95b6400915..d717ef0def64 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -374,10 +374,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) refcount_inc(&skb->users); ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && + if (!ireq->ir_iif && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1e5b53c2bb26..53f5f956d948 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk, */ sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + int bound_dev_if2; + + if (sk == sk2) + continue; + bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); + if ((!sk->sk_bound_dev_if || + !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2)) { if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { if ((!relax || diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5257a7575649..87354e20009a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -373,10 +373,10 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) { + if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET_MATCH(net, sk, acookie, + if (unlikely(!inet_match(net, sk, acookie, ports, dif, sdif))) { sock_gen_put(sk); goto begin; @@ -426,7 +426,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) { + if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) @@ -492,14 +492,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { - if (unlikely(INET_MATCH(net, esk, acookie, + if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - if (unlikely(INET6_MATCH(esk, net, + if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 53342ce17172..aa9f2ec3dc46 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2563,7 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, struct sock *sk; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - if (INET_MATCH(net, sk, acookie, ports, dif, sdif)) + if (inet_match(net, sk, acookie, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 39b2327edc4e..df665d4e8f0f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -218,11 +218,11 @@ ipv4_connected: err = -EINVAL; goto out; } - sk->sk_bound_dev_if = usin->sin6_scope_id; + WRITE_ONCE(sk->sk_bound_dev_if, usin->sin6_scope_id); } if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST)) - sk->sk_bound_dev_if = np->mcast_oif; + WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif); /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { @@ -798,7 +798,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (src_idx) { if (fl6->flowi6_oif && src_idx != fl6->flowi6_oif && - (sk->sk_bound_dev_if != fl6->flowi6_oif || + (READ_ONCE(sk->sk_bound_dev_if) != fl6->flowi6_oif || !sk_dev_equal_l3scope(sk, src_idx))) return -EINVAL; fl6->flowi6_oif = src_idx; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index a758f2ab7b51..7d53d62783b1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -71,12 +71,12 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) + if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { + if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -268,7 +268,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, + if (likely(inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3fc97d4621ac..55afd7f39c04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -105,7 +105,7 @@ static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, unsigned short hnum, int dif, int sdif) { - int score; + int bound_dev_if, score; struct inet_sock *inet; bool dev_match; @@ -132,10 +132,11 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif); if (!dev_match) return -1; - if (sk->sk_bound_dev_if) + if (bound_dev_if) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) @@ -789,7 +790,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) || + !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; @@ -1043,7 +1044,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) + inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -1433,7 +1434,7 @@ do_udp_sendmsg: } if (!fl6->flowi6_oif) - fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 6af09e188e52..4db5a554bdbd 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -50,11 +50,13 @@ static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, sk_for_each_bound(sk, &l2tp_ip_bind_table) { const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); const struct inet_sock *inet = inet_sk(sk); + int bound_dev_if; if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif) + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && dif && bound_dev_if != dif) continue; if (inet->inet_rcv_saddr && laddr && diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 217c7192691e..c6ff8bf9b55f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -62,11 +62,13 @@ static struct sock *__l2tp_ip6_bind_lookup(const struct net *net, const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); + int bound_dev_if; if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif) + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && dif && bound_dev_if != dif) continue; if (sk_laddr && !ipv6_addr_any(sk_laddr) && @@ -445,7 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_conn_id = lsk->conn_id; } if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) - lsa->l2tp_scope_id = sk->sk_bound_dev_if; + lsa->l2tp_scope_id = READ_ONCE(sk->sk_bound_dev_if); return sizeof(*lsa); } @@ -560,7 +562,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (fl6.flowi6_oif == 0) - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 0a04468b7314..49bae3d5006b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -311,12 +311,15 @@ META_COLLECTOR(int_sk_bound_if) META_COLLECTOR(var_sk_bound_if) { + int bound_dev_if; + if (skip_nonlocal(skb)) { *err = -1; return; } - if (skb->sk->sk_bound_dev_if == 0) { + bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if); + if (bound_dev_if == 0) { dst->value = (unsigned long) "any"; dst->len = 3; } else { @@ -324,7 +327,7 @@ META_COLLECTOR(var_sk_bound_if) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(skb->sk), - skb->sk->sk_bound_dev_if); + bound_dev_if); *err = var_dev(dev, dst); rcu_read_unlock(); } diff --git a/net/sctp/input.c b/net/sctp/input.c index 90e12bafdd48..4f43afa8678f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; + int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); @@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { if (transport) { sctp_transport_put(transport); asoc = NULL; |