summaryrefslogtreecommitdiff
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-11 14:27:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-11 14:27:06 -0800
commit70e71ca0af244f48a5dcf56dc435243792e3a495 (patch)
treef7d9c4c4d9a857a00043e9bf6aa2d6f533a34778 /net/ipv4/udp.c
parentbae41e45b7400496b9bf0c70c6004419d9987819 (diff)
parent00c83b01d58068dfeb2e1351cca6fccf2a83fa8f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) New offloading infrastructure and example 'rocker' driver for offloading of switching and routing to hardware. This work was done by a large group of dedicated individuals, not limited to: Scott Feldman, Jiri Pirko, Thomas Graf, John Fastabend, Jamal Hadi Salim, Andy Gospodarek, Florian Fainelli, Roopa Prabhu 2) Start making the networking operate on IOV iterators instead of modifying iov objects in-situ during transfers. Thanks to Al Viro and Herbert Xu. 3) A set of new netlink interfaces for the TIPC stack, from Richard Alpe. 4) Remove unnecessary looping during ipv6 routing lookups, from Martin KaFai Lau. 5) Add PAUSE frame generation support to gianfar driver, from Matei Pavaluca. 6) Allow for larger reordering levels in TCP, which are easily achievable in the real world right now, from Eric Dumazet. 7) Add a variable of napi_schedule that doesn't need to disable cpu interrupts, from Eric Dumazet. 8) Use a doubly linked list to optimize neigh_parms_release(), from Nicolas Dichtel. 9) Various enhancements to the kernel BPF verifier, and allow eBPF programs to actually be attached to sockets. From Alexei Starovoitov. 10) Support TSO/LSO in sunvnet driver, from David L Stevens. 11) Allow controlling ECN usage via routing metrics, from Florian Westphal. 12) Remote checksum offload, from Tom Herbert. 13) Add split-header receive, BQL, and xmit_more support to amd-xgbe driver, from Thomas Lendacky. 14) Add MPLS support to openvswitch, from Simon Horman. 15) Support wildcard tunnel endpoints in ipv6 tunnels, from Steffen Klassert. 16) Do gro flushes on a per-device basis using a timer, from Eric Dumazet. This tries to resolve the conflicting goals between the desired handling of bulk vs. RPC-like traffic. 17) Allow userspace to ask for the CPU upon what a packet was received/steered, via SO_INCOMING_CPU. From Eric Dumazet. 18) Limit GSO packets to half the current congestion window, from Eric Dumazet. 19) Add a generic helper so that all drivers set their RSS keys in a consistent way, from Eric Dumazet. 20) Add xmit_more support to enic driver, from Govindarajulu Varadarajan. 21) Add VLAN packet scheduler action, from Jiri Pirko. 22) Support configurable RSS hash functions via ethtool, from Eyal Perry. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1820 commits) Fix race condition between vxlan_sock_add and vxlan_sock_release net/macb: fix compilation warning for print_hex_dump() called with skb->mac_header net/mlx4: Add support for A0 steering net/mlx4: Refactor QUERY_PORT net/mlx4_core: Add explicit error message when rule doesn't meet configuration net/mlx4: Add A0 hybrid steering net/mlx4: Add mlx4_bitmap zone allocator net/mlx4: Add a check if there are too many reserved QPs net/mlx4: Change QP allocation scheme net/mlx4_core: Use tasklet for user-space CQ completion events net/mlx4_core: Mask out host side virtualization features for guests net/mlx4_en: Set csum level for encapsulated packets be2net: Export tunnel offloads only when a VxLAN tunnel is created gianfar: Fix dma check map error when DMA_API_DEBUG is enabled cxgb4/csiostor: Don't use MASTER_MUST for fw_hello call net: fec: only enable mdio interrupt before phy device link up net: fec: clear all interrupt events to support i.MX6SX net: fec: reset fep link status in suspend function net: sock: fix access via invalid file descriptor net: introduce helper macro for_each_cmsghdr ...
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c210
1 files changed, 114 insertions, 96 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd0db5471bb5..13b4dcf86ef6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -144,7 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
- sk_nulls_for_each(sk2, node, &hslot->head)
+ sk_nulls_for_each(sk2, node, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(bitmap || udp_sk(sk2)->udp_port_hash == num) &&
@@ -152,14 +152,13 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
- !uid_eq(uid, sock_i_uid(sk2))) &&
- (*saddr_comp)(sk, sk2)) {
- if (bitmap)
- __set_bit(udp_sk(sk2)->udp_port_hash >> log,
- bitmap);
- else
+ !uid_eq(uid, sock_i_uid(sk2))) &&
+ saddr_comp(sk, sk2)) {
+ if (!bitmap)
return 1;
+ __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
}
+ }
return 0;
}
@@ -168,10 +167,10 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
* can insert/delete a socket with local_port == num
*/
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
- struct udp_hslot *hslot2,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2))
+ struct udp_hslot *hslot2,
+ struct sock *sk,
+ int (*saddr_comp)(const struct sock *sk1,
+ const struct sock *sk2))
{
struct sock *sk2;
struct hlist_nulls_node *node;
@@ -179,7 +178,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
int res = 0;
spin_lock(&hslot2->lock);
- udp_portaddr_for_each_entry(sk2, node, &hslot2->head)
+ udp_portaddr_for_each_entry(sk2, node, &hslot2->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(udp_sk(sk2)->udp_port_hash == num) &&
@@ -187,11 +186,12 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
- !uid_eq(uid, sock_i_uid(sk2))) &&
- (*saddr_comp)(sk, sk2)) {
+ !uid_eq(uid, sock_i_uid(sk2))) &&
+ saddr_comp(sk, sk2)) {
res = 1;
break;
}
+ }
spin_unlock(&hslot2->lock);
return res;
}
@@ -206,8 +206,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2),
+ int (*saddr_comp)(const struct sock *sk1,
+ const struct sock *sk2),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -336,38 +336,45 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
}
-static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
- unsigned short hnum,
- __be16 sport, __be32 daddr, __be16 dport, int dif)
+static inline int compute_score(struct sock *sk, struct net *net,
+ __be32 saddr, unsigned short hnum, __be16 sport,
+ __be32 daddr, __be16 dport, int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
- if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
- !ipv6_only_sock(sk)) {
- struct inet_sock *inet = inet_sk(sk);
+ if (!net_eq(sock_net(sk), net) ||
+ udp_sk(sk)->udp_port_hash != hnum ||
+ ipv6_only_sock(sk))
+ return -1;
- score = (sk->sk_family == PF_INET ? 2 : 1);
- if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
- return -1;
- score += 4;
- }
- if (inet->inet_daddr) {
- if (inet->inet_daddr != saddr)
- return -1;
- score += 4;
- }
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score += 4;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score += 4;
- }
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+ inet = inet_sk(sk);
+
+ if (inet->inet_rcv_saddr) {
+ if (inet->inet_rcv_saddr != daddr)
+ return -1;
+ score += 4;
+ }
+
+ if (inet->inet_daddr) {
+ if (inet->inet_daddr != saddr)
+ return -1;
+ score += 4;
+ }
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score += 4;
+ }
+
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score += 4;
}
+
return score;
}
@@ -378,33 +385,39 @@ static inline int compute_score2(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
+
+ if (!net_eq(sock_net(sk), net) ||
+ ipv6_only_sock(sk))
+ return -1;
- if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
- struct inet_sock *inet = inet_sk(sk);
+ inet = inet_sk(sk);
- if (inet->inet_rcv_saddr != daddr)
+ if (inet->inet_rcv_saddr != daddr ||
+ inet->inet_num != hnum)
+ return -1;
+
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+
+ if (inet->inet_daddr) {
+ if (inet->inet_daddr != saddr)
return -1;
- if (inet->inet_num != hnum)
+ score += 4;
+ }
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
return -1;
+ score += 4;
+ }
- score = (sk->sk_family == PF_INET ? 2 : 1);
- if (inet->inet_daddr) {
- if (inet->inet_daddr != saddr)
- return -1;
- score += 4;
- }
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score += 4;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score += 4;
- }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score += 4;
}
+
return score;
}
@@ -1036,7 +1049,7 @@ back_from_confirm:
/* Lockless fast path for the non-corking case. */
if (!corkreq) {
- skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
+ skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
msg->msg_flags);
err = PTR_ERR(skb);
@@ -1051,7 +1064,7 @@ back_from_confirm:
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n"));
+ net_dbg_ratelimited("cork app bug 2\n");
err = -EINVAL;
goto out;
}
@@ -1067,7 +1080,7 @@ back_from_confirm:
do_append_data:
up->len += ulen;
- err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
+ err = ip_append_data(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
@@ -1133,7 +1146,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
if (unlikely(!up->pending)) {
release_sock(sk);
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n"));
+ net_dbg_ratelimited("udp cork app bug 3\n");
return -EINVAL;
}
@@ -1281,12 +1294,11 @@ try_again:
}
if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+ msg, copied);
else {
- err = skb_copy_and_csum_datagram_iovec(skb,
- sizeof(struct udphdr),
- msg->msg_iov);
+ err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr),
+ msg);
if (err == -EINVAL)
goto csum_copy_err;
@@ -1445,6 +1457,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (inet_sk(sk)->inet_daddr) {
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
}
rc = sock_queue_rcv_skb(sk, skb);
@@ -1546,8 +1559,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
* provided by the application."
*/
if (up->pcrlen == 0) { /* full coverage was set */
- LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n",
- UDP_SKB_CB(skb)->cscov, skb->len);
+ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
/* The next case involves violating the min. coverage requested
@@ -1557,8 +1570,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
* Therefore the above ...()->partial_cov statement is essential.
*/
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
- LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
@@ -1647,7 +1660,8 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
- struct udp_table *udptable)
+ struct udp_table *udptable,
+ int proto)
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
struct hlist_nulls_node *node;
@@ -1656,6 +1670,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
int dif = skb->dev->ifindex;
unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+ bool inner_flushed = false;
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
@@ -1674,6 +1689,7 @@ start_lookup:
dif, hnum)) {
if (unlikely(count == ARRAY_SIZE(stack))) {
flush_stack(stack, count, skb, ~0);
+ inner_flushed = true;
count = 0;
}
stack[count++] = sk;
@@ -1695,7 +1711,10 @@ start_lookup:
if (count) {
flush_stack(stack, count, skb, count - 1);
} else {
- kfree_skb(skb);
+ if (!inner_flushed)
+ UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
+ consume_skb(skb);
}
return 0;
}
@@ -1777,14 +1796,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (ret > 0)
return -ret;
return 0;
- } else {
- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(net, skb, uh,
- saddr, daddr, udptable);
-
- sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
}
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable, proto);
+
+ sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
int ret;
@@ -1822,11 +1840,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return 0;
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
- proto == IPPROTO_UDPLITE ? "Lite" : "",
- &saddr, ntohs(uh->source),
- ulen, skb->len,
- &daddr, ntohs(uh->dest));
+ net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source),
+ ulen, skb->len,
+ &daddr, ntohs(uh->dest));
goto drop;
csum_error:
@@ -1834,10 +1852,10 @@ csum_error:
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
- proto == IPPROTO_UDPLITE ? "Lite" : "",
- &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
- ulen);
+ net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
+ ulen);
UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
@@ -2027,7 +2045,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
} else {
up->corkflag = 0;
lock_sock(sk);
- (*push_pending_frames)(sk);
+ push_pending_frames(sk);
release_sock(sk);
}
break;