summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_sockglue.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_sockglue.c')
-rw-r--r--net/ipv4/ip_sockglue.c66
1 files changed, 51 insertions, 15 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 644dc43a55d..6c40a8c46e7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -23,6 +23,7 @@
#include <linux/icmp.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -238,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
sent to multicast group to reach destination designated router.
*/
struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
+{
+ struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+ sock_put(ra->saved_sk);
+ kfree(ra);
+}
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *))
@@ -250,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- write_lock_bh(&ip_ra_lock);
+ spin_lock_bh(&ip_ra_lock);
for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
kfree(new_ra);
return -EADDRINUSE;
}
- *rap = ra->next;
- write_unlock_bh(&ip_ra_lock);
+ /* dont let ip_call_ra_chain() use sk again */
+ ra->sk = NULL;
+ rcu_assign_pointer(*rap, ra->next);
+ spin_unlock_bh(&ip_ra_lock);
if (ra->destructor)
ra->destructor(sk);
- sock_put(sk);
- kfree(ra);
+ /*
+ * Delay sock_put(sk) and kfree(ra) after one rcu grace
+ * period. This guarantee ip_call_ra_chain() dont need
+ * to mess with socket refcounts.
+ */
+ ra->saved_sk = sk;
+ call_rcu(&ra->rcu, ip_ra_destroy_rcu);
return 0;
}
}
if (new_ra == NULL) {
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
return -ENOBUFS;
}
new_ra->sk = sk;
new_ra->destructor = destructor;
new_ra->next = ra;
- *rap = new_ra;
+ rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
return 0;
}
@@ -286,12 +303,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
- if (!inet->recverr)
- return;
-
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb)
return;
@@ -452,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
(1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
(1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
(1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
- (1<<IP_MINTTL))) ||
+ (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
optname == IP_MULTICAST_TTL ||
optname == IP_MULTICAST_ALL ||
optname == IP_MULTICAST_LOOP ||
@@ -575,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
inet->hdrincl = val ? 1 : 0;
break;
+ case IP_NODEFRAG:
+ if (sk->sk_type != SOCK_RAW) {
+ err = -ENOPROTOOPT;
+ break;
+ }
+ inet->nodefrag = val ? 1 : 0;
+ break;
case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
goto e_inval;
@@ -957,6 +977,22 @@ e_inval:
return -EINVAL;
}
+/**
+ * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * @sk: socket
+ * @skb: buffer
+ *
+ * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
+ * is not set, we drop skb dst entry now, while dst cache line is hot.
+ */
+int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
+ skb_dst_drop(skb);
+ return sock_queue_rcv_skb(sk, skb);
+}
+EXPORT_SYMBOL(ip_queue_rcv_skb);
+
int ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
{