From 7d3d43dab4e978d8d9ad1acf8af15c9b1c4b0f0f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Apr 2012 15:33:35 +0000 Subject: net: In unregister_netdevice_notifier unregister the netdevices. We already synthesize events in register_netdevice_notifier and synthesizing events in unregister_netdevice_notifier allows to us remove the need for special case cleanup code. This change should be safe as it adds no new cases for existing callers of unregiser_netdevice_notifier to handle. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c25d453b280..9bb8f87c4cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1409,14 +1409,34 @@ EXPORT_SYMBOL(register_netdevice_notifier); * register_netdevice_notifier(). The notifier is unlinked into the * kernel structures and may then be reused. A negative errno code * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. */ int unregister_netdevice_notifier(struct notifier_block *nb) { + struct net_device *dev; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_chain, nb); + if (err) + goto unlock; + + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); + } + } +unlock: rtnl_unlock(); return err; } -- cgit v1.2.3 From 03478756b1b9298686ca9c8793e484ae39eb4649 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Apr 2012 15:35:39 +0000 Subject: phonet: Sort out initiailziation and cleanup code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently an oops was reported in phonet if there was a failure during network namespace creation. [ 163.733755] ------------[ cut here ]------------ [ 163.734501] kernel BUG at include/net/netns/generic.h:45! [ 163.734501] invalid opcode: 0000 [#1] PREEMPT SMP [ 163.734501] CPU 2 [ 163.734501] Pid: 19145, comm: trinity Tainted: G W 3.4.0-rc1-next-20120405-sasha-dirty #57 [ 163.734501] RIP: 0010:[] [] phonet_pernet+0x182/0x1a0 [ 163.734501] RSP: 0018:ffff8800674d5ca8 EFLAGS: 00010246 [ 163.734501] RAX: 000000003fffffff RBX: 0000000000000000 RCX: ffff8800678c88d8 [ 163.734501] RDX: 00000000003f4000 RSI: ffff8800678c8910 RDI: 0000000000000282 [ 163.734501] RBP: ffff8800674d5cc8 R08: 0000000000000000 R09: 0000000000000000 [ 163.734501] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880068bec920 [ 163.734501] R13: ffffffff836b90c0 R14: 0000000000000000 R15: 0000000000000000 [ 163.734501] FS: 00007f055e8de700(0000) GS:ffff88007d000000(0000) knlGS:0000000000000000 [ 163.734501] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 163.734501] CR2: 00007f055e6bb518 CR3: 0000000070c16000 CR4: 00000000000406e0 [ 163.734501] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 163.734501] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 163.734501] Process trinity (pid: 19145, threadinfo ffff8800674d4000, task ffff8800678c8000) [ 163.734501] Stack: [ 163.734501] ffffffff824d5f00 ffffffff810e2ec1 ffff880067ae0000 00000000ffffffd4 [ 163.734501] ffff8800674d5cf8 ffffffff824d667a ffff880067ae0000 00000000ffffffd4 [ 163.734501] ffffffff836b90c0 0000000000000000 ffff8800674d5d18 ffffffff824d707d [ 163.734501] Call Trace: [ 163.734501] [] ? phonet_pernet+0x20/0x1a0 [ 163.734501] [] ? get_parent_ip+0x11/0x50 [ 163.734501] [] phonet_device_destroy+0x1a/0x100 [ 163.734501] [] phonet_device_notify+0x3d/0x50 [ 163.734501] [] notifier_call_chain+0xee/0x130 [ 163.734501] [] raw_notifier_call_chain+0x11/0x20 [ 163.734501] [] call_netdevice_notifiers+0x52/0x60 [ 163.734501] [] rollback_registered_many+0x185/0x270 [ 163.734501] [] unregister_netdevice_many+0x14/0x60 [ 163.734501] [] ipip_exit_net+0x1b3/0x1d0 [ 163.734501] [] ? ipip_rcv+0x420/0x420 [ 163.734501] [] ops_exit_list+0x35/0x70 [ 163.734501] [] setup_net+0xab/0xe0 [ 163.734501] [] copy_net_ns+0x76/0x100 [ 163.734501] [] create_new_namespaces+0xfb/0x190 [ 163.734501] [] unshare_nsproxy_namespaces+0x61/0x80 [ 163.734501] [] sys_unshare+0xff/0x290 [ 163.734501] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 163.734501] [] system_call_fastpath+0x16/0x1b [ 163.734501] Code: e0 c3 fe 66 0f 1f 44 00 00 48 c7 c2 40 60 4d 82 be 01 00 00 00 48 c7 c7 80 d1 23 83 e8 48 2a c4 fe e8 73 06 c8 fe 48 85 db 75 0e <0f> 0b 0f 1f 40 00 eb fe 66 0f 1f 44 00 00 48 83 c4 10 48 89 d8 [ 163.734501] RIP [] phonet_pernet+0x182/0x1a0 [ 163.734501] RSP [ 163.861289] ---[ end trace fb5615826c548066 ]--- After investigation it turns out there were two issues. 1) Phonet was not implementing network devices but was using register_pernet_device instead of register_pernet_subsys. This was allowing there to be cases when phonenet was not initialized and the phonet net_generic was not set for a network namespace when network device events were being reported on the netdevice_notifier for a network namespace leading to the oops above. 2) phonet_exit_net was implementing a confusing and special case of handling all network devices from going away that it was hard to see was correct, and would only occur when the phonet module was removed. Now that unregister_netdevice_notifier has been modified to synthesize unregistration events for the network devices that are extant when called this confusing special case in phonet_exit_net is no longer needed. Signed-off-by: Eric W. Biederman Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 9b9a85ecc4c..bf5cf69c820 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -331,23 +331,6 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { - struct phonet_net *pnn = phonet_pernet(net); - struct net_device *dev; - unsigned i; - - rtnl_lock(); - for_each_netdev(net, dev) - phonet_device_destroy(dev); - - for (i = 0; i < 64; i++) { - dev = pnn->routes.table[i]; - if (dev) { - rtm_phonet_notify(RTM_DELROUTE, dev, i); - dev_put(dev); - } - } - rtnl_unlock(); - proc_net_remove(net, "phonet"); } @@ -361,7 +344,7 @@ static struct pernet_operations phonet_net_ops = { /* Initialize Phonet devices list */ int __init phonet_device_init(void) { - int err = register_pernet_device(&phonet_net_ops); + int err = register_pernet_subsys(&phonet_net_ops); if (err) return err; @@ -377,7 +360,7 @@ void phonet_device_exit(void) { rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); - unregister_pernet_device(&phonet_net_ops); + unregister_pernet_subsys(&phonet_net_ops); proc_net_remove(&init_net, "pnresource"); } -- cgit v1.2.3 From 89eb06f11c314c2ab4ec59039715dc021933a7a0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Apr 2012 22:41:10 +0000 Subject: net/key/af_key.c: add missing kfree_skb At the point of this error-handling code, alloc_skb has succeded, so free the resulting skb by jumping to the err label. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 11dbb2255cc..7e5d927b576 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3480,7 +3480,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Addresses to be used by KM for negotiation, if ext is available */ if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) - return -EINVAL; + goto err; /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); -- cgit v1.2.3 From d1f224ae186b834af647661ffaf403a817c050ce Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 10 Apr 2012 00:10:42 +0000 Subject: l2tp: fix refcount leak in l2tp_ip sockets The l2tp_ip socket close handler does not update the module refcount correctly which prevents module unload after the first bind() call on an L2TPv3 IP encapulation socket. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 55670ec3cd0..b56be1452f8 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -232,7 +232,7 @@ static void l2tp_ip_close(struct sock *sk, long timeout) { write_lock_bh(&l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); - hlist_del_init(&sk->sk_node); + sk_del_node_init(sk); write_unlock_bh(&l2tp_ip_lock); sk_common_release(sk); } -- cgit v1.2.3 From c9be48dc8bb22f1f6e6ff1560b2b28e925a0b815 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 10 Apr 2012 00:10:43 +0000 Subject: l2tp: don't overwrite source address in l2tp_ip_bind() Applications using L2TP/IP sockets want to be able to bind() an L2TP/IP socket to set the local tunnel id while leaving the auto-assigned source address alone. So if no source address is supplied, don't overwrite the address already stored in the socket. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b56be1452f8..585d93ecee2 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -271,7 +271,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr; + if (addr->l2tp_addr.s_addr) + inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); -- cgit v1.2.3 From 5c699fb7d88d360023f3b3f5291cbf5b59883a1b Mon Sep 17 00:00:00 2001 From: Tomasz Gregorek Date: Thu, 12 Apr 2012 08:18:07 +0000 Subject: caif: Fix memory leakage in the chnl_net.c. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added kfree_skb() calls in the chnk_net.c file on the error paths. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 20618dd3088..d09340e1523 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -103,6 +103,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) skb->protocol = htons(ETH_P_IPV6); break; default: + kfree_skb(skb); priv->netdev->stats.rx_errors++; return -EINVAL; } @@ -220,14 +221,16 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->len > priv->netdev->mtu) { pr_warn("Size of skb exceeded MTU\n"); + kfree_skb(skb); dev->stats.tx_errors++; - return -ENOSPC; + return NETDEV_TX_OK; } if (!priv->flowenabled) { pr_debug("dropping packets flow off\n"); + kfree_skb(skb); dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) @@ -242,7 +245,7 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev) result = priv->chnl.dn->transmit(priv->chnl.dn, pkt); if (result) { dev->stats.tx_dropped++; - return result; + return NETDEV_TX_OK; } /* Update statistics. */ -- cgit v1.2.3 From 1716a96101c49186bb0b8491922fd3e69030235f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 6 Apr 2012 00:13:10 +0000 Subject: ipv6: fix problem with expired dst cache If the ipv6 dst cache which copy from the dst generated by ICMPV6 RA packet. this dst cache will not check expire because it has no RTF_EXPIRES flag. So this dst cache will always be used until the dst gc run. Change the struct dst_entry,add a union contains new pointer from and expires. When rt6_info.rt6i_flags has no RTF_EXPIRES flag,the dst.expires has no use. we can use this field to point to where the dst cache copy from. The dst.from is only used in IPV6. rt6_check_expired check if rt6_info.dst.from is expired. ip6_rt_copy only set dst.from when the ort has flag RTF_ADDRCONF and RTF_DEFAULT.then hold the ort. ip6_dst_destroy release the ort. Add some functions to operate the RTF_EXPIRES flag and expires(from) together. and change the code to use these new adding functions. Changes from v5: modify ip6_route_add and ndisc_router_discovery to use new adding functions. Only set dst.from when the ort has flag RTF_ADDRCONF and RTF_DEFAULT.then hold the ort. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/net/dst.h | 6 ++++- include/net/ip6_fib.h | 42 ++++++++++++++++++++++++++++++ net/ipv6/addrconf.c | 9 +++---- net/ipv6/ip6_fib.c | 9 +++---- net/ipv6/ndisc.c | 3 +-- net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++-------------------- 6 files changed, 99 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index 59c5d18cc38..ff4da42fcfc 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -36,7 +36,11 @@ struct dst_entry { struct net_device *dev; struct dst_ops *ops; unsigned long _metrics; - unsigned long expires; + union { + unsigned long expires; + /* point to where the dst_entry copied from */ + struct dst_entry *from; + }; struct dst_entry *path; struct neighbour __rcu *_neighbour; #ifdef CONFIG_XFRM diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index b26bb810198..c64778fd5e1 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -123,6 +123,48 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } +static inline void rt6_clean_expires(struct rt6_info *rt) +{ + if (!(rt->rt6i_flags & RTF_EXPIRES) && rt->dst.from) + dst_release(rt->dst.from); + + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->dst.expires = 0; +} + +static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) +{ + if (!(rt->rt6i_flags & RTF_EXPIRES) && rt->dst.from) + dst_release(rt->dst.from); + + rt->rt6i_flags |= RTF_EXPIRES; + rt->dst.expires = expires; +} + +static inline void rt6_update_expires(struct rt6_info *rt, int timeout) +{ + if (!(rt->rt6i_flags & RTF_EXPIRES) && rt->dst.from) + dst_release(rt->dst.from); + + dst_set_expires(&rt->dst, timeout); + rt->rt6i_flags |= RTF_EXPIRES; +} + +static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) +{ + struct dst_entry *new = (struct dst_entry *) from; + + if (!(rt->rt6i_flags & RTF_EXPIRES) && rt->dst.from) { + if (new == rt->dst.from) + return; + dst_release(rt->dst.from); + } + + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->dst.from = new; + dst_hold(new); +} + struct fib6_walker_t { struct list_head lh; struct fib6_node *root, *node; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a3bb6077e1..7d5cb975cc6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -803,8 +803,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt->dst.expires = expires; - rt->rt6i_flags |= RTF_EXPIRES; + rt6_set_expires(rt, expires); } } dst_release(&rt->dst); @@ -1887,11 +1886,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt->dst.expires = jiffies + rt_expires; - rt->rt6i_flags |= RTF_EXPIRES; + rt6_set_expires(rt, jiffies + rt_expires); } else { - rt->rt6i_flags &= ~RTF_EXPIRES; - rt->dst.expires = 0; + rt6_clean_expires(rt); } } else if (valid_lft) { clock_t expires = 0; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5b27fbcae34..93717435013 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -673,11 +673,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, &rt->rt6i_gateway)) { if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; - iter->dst.expires = rt->dst.expires; - if (!(rt->rt6i_flags & RTF_EXPIRES)) { - iter->rt6i_flags &= ~RTF_EXPIRES; - iter->dst.expires = 0; - } + if (!(rt->rt6i_flags & RTF_EXPIRES)) + rt6_clean_expires(iter); + else + rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3dcdb81ec3e..176b469322a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1264,8 +1264,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt) - rt->dst.expires = jiffies + (HZ * lifetime); - + rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3992e26a603..bc4888d902b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -62,7 +62,7 @@ #include #endif -static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, +static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); @@ -285,6 +285,10 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } + + if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) + dst_release(dst->from); + if (peer) { rt->rt6i_peer = NULL; inet_putpeer(peer); @@ -329,8 +333,17 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static __inline__ int rt6_check_expired(const struct rt6_info *rt) { - return (rt->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, rt->dst.expires); + struct rt6_info *ort = NULL; + + if (rt->rt6i_flags & RTF_EXPIRES) { + if (time_after(jiffies, rt->dst.expires)) + return 1; + } else if (rt->dst.from) { + ort = (struct rt6_info *) rt->dst.from; + return (ort->rt6i_flags & RTF_EXPIRES) && + time_after(jiffies, ort->dst.expires); + } + return 0; } static inline int rt6_need_strict(const struct in6_addr *daddr) @@ -620,12 +633,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (!addrconf_finite_timeout(lifetime)) { - rt->rt6i_flags &= ~RTF_EXPIRES; - } else { - rt->dst.expires = jiffies + HZ * lifetime; - rt->rt6i_flags |= RTF_EXPIRES; - } + if (!addrconf_finite_timeout(lifetime)) + rt6_clean_expires(rt); + else + rt6_set_expires(rt, jiffies + HZ * lifetime); + dst_release(&rt->dst); } return 0; @@ -730,7 +742,7 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info); } -static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, +static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -954,10 +966,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->dst.expires = 0; rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_flags = ort->rt6i_flags; + rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1019,10 +1031,9 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - dst_set_expires(&rt->dst, 0); - rt->rt6i_flags |= RTF_EXPIRES; - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) + rt6_update_expires(rt, 0); + else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) rt->rt6i_node->fn_sernum = -1; } } @@ -1289,9 +1300,12 @@ int ip6_route_add(struct fib6_config *cfg) } rt->dst.obsolete = -1; - rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ? - jiffies + clock_t_to_jiffies(cfg->fc_expires) : - 0; + + if (cfg->fc_flags & RTF_EXPIRES) + rt6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); + else + rt6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1736,8 +1750,8 @@ again: features |= RTAX_FEATURE_ALLFRAG; dst_metric_set(&rt->dst, RTAX_FEATURES, features); } - dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; + rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); + rt->rt6i_flags |= RTF_MODIFIED; goto out; } @@ -1765,9 +1779,8 @@ again: * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - + rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); + nrt->rt6i_flags |= RTF_DYNAMIC; ip6_ins_rt(nrt); } out: @@ -1799,7 +1812,7 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad * Misc support functions */ -static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, +static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); @@ -1819,10 +1832,14 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->dst.expires = 0; rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_flags = ort->rt6i_flags; + if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == + (RTF_DEFAULT | RTF_ADDRCONF)) + rt6_set_from(rt, ort); + else + rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES -- cgit v1.2.3 From e55a4046dab28c440c96890bdddcf02dc8981f2d Mon Sep 17 00:00:00 2001 From: Lukasz Kucharczyk Date: Wed, 11 Apr 2012 14:55:10 +0200 Subject: cfg80211: fix interface combinations check. Signed-off-by: Lukasz Kucharczyk Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 1b7a08df933..957f2562161 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -989,7 +989,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, if (rdev->wiphy.software_iftypes & BIT(iftype)) continue; for (j = 0; j < c->n_limits; j++) { - if (!(limits[j].types & iftype)) + if (!(limits[j].types & BIT(iftype))) continue; if (limits[j].max < num[iftype]) goto cont; -- cgit v1.2.3 From 244b65dbfede788f2fa3fe2463c44d0809e97c6b Mon Sep 17 00:00:00 2001 From: David Ward Date: Sun, 15 Apr 2012 12:31:45 +0000 Subject: net_sched: gred: Fix oops in gred_dump() in WRED mode A parameter set exists for WRED mode, called wred_set, to hold the same values for qavg and qidlestart across all VQs. The WRED mode values had been previously held in the VQ for the default DP. After these values were moved to wred_set, the VQ for the default DP was no longer created automatically (so that it could be omitted on purpose, to have packets in the default DP enqueued directly to the device without using RED). However, gred_dump() was overlooked during that change; in WRED mode it still reads qavg/qidlestart from the VQ for the default DP, which might not even exist. As a result, this command sequence will cause an oops: tc qdisc add dev $DEV handle $HANDLE parent $PARENT gred setup \ DPs 3 default 2 grio tc qdisc change dev $DEV handle $HANDLE gred DP 0 prio 8 $RED_OPTIONS tc qdisc change dev $DEV handle $HANDLE gred DP 1 prio 8 $RED_OPTIONS This fixes gred_dump() in WRED mode to use the values held in wred_set. Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 0b15236be7b..8179494c269 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -565,11 +565,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.packets = q->packetsin; opt.bytesin = q->bytesin; - if (gred_wred_mode(table)) { - q->vars.qidlestart = - table->tab[table->def]->vars.qidlestart; - q->vars.qavg = table->tab[table->def]->vars.qavg; - } + if (gred_wred_mode(table)) + gred_load_wred_set(table, q); opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); -- cgit v1.2.3 From 973ef21a676e55a8e1a100a6e109f0c116ea75e8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 16 Apr 2012 14:56:48 +0200 Subject: mac80211: fix truncated packets in cooked monitor rx Cooked monitor rx was recently changed to use ieee80211_add_rx_radiotap_header instead of generating only limited radiotap information. ieee80211_add_rx_radiotap_header assumes that FCS info is still present if the hardware supports receiving it, however when cooked monitor rx packets are processed, FCS info has already been stripped. Fix this by adding an extra flag indicating FCS presence. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bcfe8c77c83..d64e285400a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -103,7 +103,7 @@ static void ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_rate *rate, - int rtap_len) + int rtap_len, bool has_fcs) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_header *rthdr; @@ -134,7 +134,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } /* IEEE80211_RADIOTAP_FLAGS */ - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + if (has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)) *pos |= IEEE80211_RADIOTAP_F_FCS; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) *pos |= IEEE80211_RADIOTAP_F_BADFCS; @@ -294,7 +294,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, } /* prepend radiotap information */ - ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom); + ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom, + true); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -2571,7 +2572,8 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, goto out_free_skb; /* prepend radiotap information */ - ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom); + ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom, + false); skb_set_mac_header(skb, 0); skb->ip_summed = CHECKSUM_UNNECESSARY; -- cgit v1.2.3 From 6741e7f048dacc92e37c5d724ff5c64e45f6c2c9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 16 Apr 2012 22:10:42 +0200 Subject: mac80211: fix logic error in ibss channel type check The broken check leads to rate control attempting to use HT40 while the driver is configured for HT20. This leads to interesting hardware issues. HT40 can only be used if the channel type is either HT40- or HT40+ and if the channel type of the cell matches the local type. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 33fd8d9f714..cef7c29214a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -457,8 +457,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * fall back to HT20 if we don't use or use * the other extension channel */ - if ((channel_type == NL80211_CHAN_HT40MINUS || - channel_type == NL80211_CHAN_HT40PLUS) && + if (!(channel_type == NL80211_CHAN_HT40MINUS || + channel_type == NL80211_CHAN_HT40PLUS) || channel_type != sdata->u.ibss.channel_type) sta_ht_cap_new.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; -- cgit v1.2.3 From 4d846f02392a710f9604892ac3329e628e60a230 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Apr 2012 23:28:07 +0000 Subject: tcp: fix tcp_grow_window() for large incoming frames tcp_grow_window() has to grow rcv_ssthresh up to window_clamp, allowing sender to increase its window. tcp_grow_window() still assumes a tcp frame is under MSS, but its no longer true with LRO/GRO. This patch fixes one of the performance issue we noticed with GRO on. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9944c1d9a21..3ff36406537 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -335,6 +335,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) incr = __tcp_grow_window(sk, skb); if (incr) { + incr = max_t(int, incr, 2 * skb->len); tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); inet_csk(sk)->icsk_ack.quick |= 1; -- cgit v1.2.3 From b922934d017f1cc831b017913ed7d1a56c558b43 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 16 Apr 2012 04:43:15 +0000 Subject: netns: do not leak net_generic data on failed init ops_init should free the net_generic data on init failure and __register_pernet_operations should not call ops_free when NET_NS is not enabled. Signed-off-by: Julian Anastasov Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/net_namespace.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0e950fda9a0..31a5ae51a45 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -83,21 +83,29 @@ assign: static int ops_init(const struct pernet_operations *ops, struct net *net) { - int err; + int err = -ENOMEM; + void *data = NULL; + if (ops->id && ops->size) { - void *data = kzalloc(ops->size, GFP_KERNEL); + data = kzalloc(ops->size, GFP_KERNEL); if (!data) - return -ENOMEM; + goto out; err = net_assign_generic(net, *ops->id, data); - if (err) { - kfree(data); - return err; - } + if (err) + goto cleanup; } + err = 0; if (ops->init) - return ops->init(net); - return 0; + err = ops->init(net); + if (!err) + return 0; + +cleanup: + kfree(data); + +out: + return err; } static void ops_free(const struct pernet_operations *ops, struct net *net) @@ -448,12 +456,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - int err = 0; - err = ops_init(ops, &init_net); - if (err) - ops_free(ops, &init_net); - return err; - + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) -- cgit v1.2.3 From adae0fe0ea87e8fb1a72dde304937c60759b495f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 5 Apr 2012 21:04:37 +0400 Subject: SUNRPC: register PipeFS file system after pernet sybsystem PipeFS superblock creation routine relays on SUNRPC pernet data presense, which is created on register_pernet_subsys() call in SUNRPC module init function. Registering of PipeFS filesystem prior to registering of per-net subsystem leads to races (mount of PipeFS can dereference uninitialized data). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/sunrpc_syms.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8adfc88e793..3d6498af9ad 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -75,19 +75,20 @@ static struct pernet_operations sunrpc_net_ops = { static int __init init_sunrpc(void) { - int err = register_rpc_pipefs(); + int err = rpc_init_mempool(); if (err) goto out; - err = rpc_init_mempool(); - if (err) - goto out2; err = rpcauth_init_module(); if (err) - goto out3; + goto out2; cache_initialize(); err = register_pernet_subsys(&sunrpc_net_ops); + if (err) + goto out3; + + err = register_rpc_pipefs(); if (err) goto out4; #ifdef RPC_DEBUG @@ -98,11 +99,11 @@ init_sunrpc(void) return 0; out4: - rpcauth_remove_module(); + unregister_pernet_subsys(&sunrpc_net_ops); out3: - rpc_destroy_mempool(); + rpcauth_remove_module(); out2: - unregister_rpc_pipefs(); + rpc_destroy_mempool(); out: return err; } -- cgit v1.2.3 From 22b4a4f22da4b39c6f7f679fd35f3d35c91bf851 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Apr 2012 10:14:23 +0000 Subject: tcp: fix retransmit of partially acked frames Alexander Beregalov reported skb_over_panic errors and provided stack trace. I occurs commit a21d45726aca (tcp: avoid order-1 allocations on wifi and tx path) added a regression, when a retransmit is done after a partial ACK. tcp_retransmit_skb() tries to aggregate several frames if the first one has enough available room to hold the following ones payload. This is controlled by /proc/sys/net/ipv4/tcp_retrans_collapse tunable (default : enabled) Problem is we must make sure _pskb_trim_head() doesnt fool skb_availroom() when pulling some bytes from skb (this pull is done when receiver ACK part of the frame). Reported-by: Alexander Beregalov Cc: Marc MERLIN Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 376b2cfbb68..7ac6423117a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1096,6 +1096,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); + skb->avail_size -= eat; len -= eat; if (!len) return; -- cgit v1.2.3 From 3adadc08cc1e2cbcc15a640d639297ef5fcb17f5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 18 Apr 2012 16:11:23 +0000 Subject: net ax25: Reorder ax25_exit to remove races. While reviewing the sysctl code in ax25 I spotted races in ax25_exit where it is possible to receive notifications and packets after already freeing up some of the data structures needed to process those notifications and updates. Call unregister_netdevice_notifier early so that the rest of the cleanup code does not need to deal with network devices. This takes advantage of my recent enhancement to unregister_netdevice_notifier to send unregister notifications of all network devices that are current registered. Move the unregistration for packet types, socket types and protocol types before we cleanup any of the ax25 data structures to remove the possibilities of other races. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 0906c194a41..9d9a6a3edbd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -2011,16 +2011,17 @@ static void __exit ax25_exit(void) proc_net_remove(&init_net, "ax25_route"); proc_net_remove(&init_net, "ax25"); proc_net_remove(&init_net, "ax25_calls"); - ax25_rt_free(); - ax25_uid_free(); - ax25_dev_free(); - ax25_unregister_sysctl(); unregister_netdevice_notifier(&ax25_dev_notifier); + ax25_unregister_sysctl(); dev_remove_pack(&ax25_packet_type); sock_unregister(PF_AX25); proto_unregister(&ax25_proto); + + ax25_rt_free(); + ax25_uid_free(); + ax25_dev_free(); } module_exit(ax25_exit); -- cgit v1.2.3 From bbe362be5368b9f531b95a4a9b502ae2832e1dac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Apr 2012 07:16:21 +0000 Subject: drop_monitor: allow more events per second It seems there is a logic error in trace_drop_common(), since we store only 64 drops, even if they are from same location. This fix is a one liner, but we probably need more work to avoid useless atomic dec/inc Now I can watch 1 Mpps drops through dropwatch... Signed-off-by: Eric Dumazet Cc: Neil Horman Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 7f36b38e060..5c3c81a609e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -150,6 +150,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) for (i = 0; i < msg->entries; i++) { if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { msg->points[i].count++; + atomic_inc(&data->dm_hit_count); goto out; } } -- cgit v1.2.3 From d135c522f1234f62e81be29cebdf59e9955139ad Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 22 Apr 2012 09:45:47 +0000 Subject: tcp: fix TCP_MAXSEG for established IPv6 passive sockets Commit f5fff5d forgot to fix TCP_MAXSEG behavior IPv6 sockets, so IPv6 TCP server sockets that used TCP_MAXSEG would find that the advmss of child sockets would be incorrect. This commit mirrors the advmss logic from tcp_v4_syn_recv_sock in tcp_v6_syn_recv_sock. Eventually this logic should probably be shared between IPv4 and IPv6, but this at least fixes this issue. Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 86cfe6005f4..98256cf72f9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1383,6 +1383,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); if (tcp_rsk(req)->snt_synack) tcp_valid_rtt_meas(newsk, -- cgit v1.2.3