From 745c0ce35f904aeff8e1ea325c259a14a00ff1b7 Mon Sep 17 00:00:00 2001 From: Vishal Agarwal Date: Fri, 13 Apr 2012 17:43:22 +0530 Subject: Bluetooth: hci_persistent_key should return bool This patch changes the return type of function hci_persistent_key from int to bool because it makes more sense to return information whether a key is persistent or not as a bool. Signed-off-by: Vishal Agarwal Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 21 +++++++++++---------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2054c1321c8..c2251e4c3b7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1216,40 +1216,40 @@ struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) return NULL; } -static int hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, +static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, u8 key_type, u8 old_key_type) { /* Legacy key */ if (key_type < 0x03) - return 1; + return true; /* Debug keys are insecure so don't store them persistently */ if (key_type == HCI_LK_DEBUG_COMBINATION) - return 0; + return false; /* Changed combination key and there's no previous one */ if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff) - return 0; + return false; /* Security mode 3 case */ if (!conn) - return 1; + return true; /* Neither local nor remote side had no-bonding as requirement */ if (conn->auth_type > 0x01 && conn->remote_auth > 0x01) - return 1; + return true; /* Local side had dedicated bonding as requirement */ if (conn->auth_type == 0x02 || conn->auth_type == 0x03) - return 1; + return true; /* Remote side had dedicated bonding as requirement */ if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) - return 1; + return true; /* If none of the above criteria match, then don't store the key * persistently */ - return 0; + return false; } struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, u8 rand[8]) @@ -1286,7 +1286,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len) { struct link_key *key, *old_key; - u8 old_key_type, persistent; + u8 old_key_type; + bool persistent; old_key = hci_find_link_key(hdev, bdaddr); if (old_key) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4ef275c6967..4bb03b11112 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2884,7 +2884,7 @@ int mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status) return 0; } -int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, u8 persistent) +int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { struct mgmt_ev_new_link_key ev; -- cgit v1.2.3 From 6ec5bcadc21e13ceba8c144e4731eccac01d04f7 Mon Sep 17 00:00:00 2001 From: Vishal Agarwal Date: Mon, 16 Apr 2012 14:44:44 +0530 Subject: Bluetooth: Temporary keys should be retained during connection If a key is non persistent then it should not be used in future connections but it should be kept for current connection. And it should be removed when connecion is removed. Signed-off-by: Vishal Agarwal Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 6 ++---- net/bluetooth/hci_event.c | 2 ++ 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f8577c16fcf..db1c5df4522 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -314,6 +314,7 @@ struct hci_conn { __u8 remote_cap; __u8 remote_auth; + bool flush_key; unsigned int sent; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c2251e4c3b7..a7607e4be34 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1330,10 +1330,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, mgmt_new_link_key(hdev, key, persistent); - if (!persistent) { - list_del(&key->list); - kfree(key); - } + if (conn) + conn->flush_key = !persistent; return 0; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index badb7851d11..6a72eaea70e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1902,6 +1902,8 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff } if (ev->status == 0) { + if (conn->type == ACL_LINK && conn->flush_key) + hci_remove_link_key(hdev, &conn->dst); hci_proto_disconn_cfm(conn, ev->reason); hci_conn_del(conn); } -- cgit v1.2.3 From a881e963c7fe1f226e991ee9bbe8907acda93294 Mon Sep 17 00:00:00 2001 From: "Peter Huang (Peng)" Date: Thu, 19 Apr 2012 20:12:51 +0000 Subject: set fake_rtable's dst to NULL to avoid kernel Oops bridge: set fake_rtable's dst to NULL to avoid kernel Oops when bridge is deleted before tap/vif device's delete, kernel may encounter an oops because of NULL reference to fake_rtable's dst. Set fake_rtable's dst to NULL before sending packets out can solve this problem. v4 reformat, change br_drop_fake_rtable(skb) to {} v3 enrich commit header v2 introducing new flag DST_FAKE_RTABLE to dst_entry struct. [ Use "do { } while (0)" for nop br_drop_fake_rtable() implementation -DaveM ] Acked-by: Eric Dumazet Signed-off-by: Peter Huang Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 9 +++++++++ include/net/dst.h | 1 + net/bridge/br_forward.c | 1 + net/bridge/br_netfilter.c | 8 ++------ 4 files changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 0ddd161f3b0..31d2844e657 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -104,9 +104,18 @@ struct bridge_skb_cb { } daddr; }; +static inline void br_drop_fake_rtable(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && (dst->flags & DST_FAKE_RTABLE)) + skb_dst_drop(skb); +} + #else #define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) +#define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/include/net/dst.h b/include/net/dst.h index ff4da42fcfc..bed833d9796 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,6 +59,7 @@ struct dst_entry { #define DST_NOCACHE 0x0010 #define DST_NOCOUNT 0x0020 #define DST_NOPEER 0x0040 +#define DST_FAKE_RTABLE 0x0080 short error; short obsolete; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 61f65344e71..a2098e3de50 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -47,6 +47,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); dev_queue_xmit(skb); } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index dec4f381713..d7f49b63ab0 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -156,7 +156,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->dst.dev = br->dev; rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM | DST_NOPEER; + rt->dst.flags = DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } @@ -694,11 +694,7 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct rtable *rt = skb_rtable(skb); - - if (rt && rt == bridge_parent_rtable(in)) - skb_dst_drop(skb); - + br_drop_fake_rtable(skb); return NF_ACCEPT; } -- cgit v1.2.3 From 16cde9931bcd8d8ca968ef1cded02684ea040374 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 13 Apr 2012 12:32:42 +0200 Subject: Bluetooth: Fix missing break in hci_cmd_complete_evt Command complete event for HCI_OP_USER_PASSKEY_NEG_REPLY would result in calling handler function also for HCI_OP_LE_SET_SCAN_PARAM. This could result in undefined behaviour. Signed-off-by: Szymon Janc Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6a72eaea70e..7f87a70b861 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2314,6 +2314,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk case HCI_OP_USER_PASSKEY_NEG_REPLY: hci_cc_user_passkey_neg_reply(hdev, skb); + break; case HCI_OP_LE_SET_SCAN_PARAM: hci_cc_le_set_scan_param(hdev, skb); -- cgit v1.2.3 From afa762f6871a8cb05fbef5d0f83fac14304aa816 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 23 Apr 2012 14:45:15 +0300 Subject: mac80211: call ieee80211_mgd_stop() on interface stop ieee80211_mgd_teardown() is called on netdev removal, which occurs after the vif was already removed from the low-level driver, resulting in the following warning: [ 4809.014734] ------------[ cut here ]------------ [ 4809.019861] WARNING: at net/mac80211/driver-ops.h:12 ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211]() [ 4809.030388] wlan0: Failed check-sdata-in-driver check, flags: 0x4 [ 4809.036862] Modules linked in: wlcore_sdio(-) wl12xx wlcore mac80211 cfg80211 [last unloaded: cfg80211] [ 4809.046849] [] (unwind_backtrace+0x0/0x12c) [ 4809.055937] [] (dump_stack+0x20/0x24) [ 4809.065385] [] (warn_slowpath_common+0x5c/0x74) [ 4809.075589] [] (warn_slowpath_fmt+0x40/0x48) [ 4809.088291] [] (ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211]) [ 4809.102844] [] (ieee80211_destroy_auth_data+0x80/0xa4 [mac80211]) [ 4809.116276] [] (ieee80211_mgd_teardown+0x5c/0x74 [mac80211]) [ 4809.129331] [] (ieee80211_teardown_sdata+0xb0/0xd8 [mac80211]) [ 4809.141595] [] (rollback_registered_many+0x228/0x2f0) [ 4809.153056] [] (unregister_netdevice_many+0x28/0x50) [ 4809.165696] [] (ieee80211_remove_interfaces+0xb4/0xdc [mac80211]) [ 4809.179151] [] (ieee80211_unregister_hw+0x50/0xf0 [mac80211]) [ 4809.191043] [] (wlcore_remove+0x5c/0x7c [wlcore]) [ 4809.201491] [] (platform_drv_remove+0x24/0x28) [ 4809.212029] [] (__device_release_driver+0x8c/0xcc) [ 4809.222738] [] (device_release_driver+0x30/0x3c) [ 4809.233099] [] (bus_remove_device+0x10c/0x128) [ 4809.242620] [] (device_del+0x11c/0x17c) [ 4809.252150] [] (platform_device_del+0x28/0x68) [ 4809.263051] [] (wl1271_remove+0x3c/0x50 [wlcore_sdio]) [ 4809.273590] [] (sdio_bus_remove+0x48/0xf8) [ 4809.283754] [] (__device_release_driver+0x8c/0xcc) [ 4809.293729] [] (driver_detach+0x9c/0xc4) [ 4809.303163] [] (bus_remove_driver+0xc4/0xf4) [ 4809.312973] [] (driver_unregister+0x70/0x7c) [ 4809.323220] [] (sdio_unregister_driver+0x24/0x2c) [ 4809.334213] [] (wl1271_exit+0x14/0x1c [wlcore_sdio]) [ 4809.344930] [] (sys_delete_module+0x228/0x2a8) [ 4809.354734] ---[ end trace 515290ccf5feb522 ]--- Rename ieee80211_mgd_teardown() to ieee80211_mgd_stop(), and call it on ieee80211_do_stop(). Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 4 ++-- net/mac80211/mlme.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d9798a307f2..db8fae51714 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1210,7 +1210,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); -void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata); +void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 401c01f0731..c20051b7ffc 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -486,6 +486,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps_bc_buf); skb_queue_purge(&sdata->u.ap.ps_bc_buf); + } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { + ieee80211_mgd_stop(sdata); } if (going_down) @@ -644,8 +646,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - else if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_mgd_teardown(sdata); flushed = sta_info_flush(local, sdata); WARN_ON(flushed); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f76da5b3f5c..20c680bfc3a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3497,7 +3497,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return 0; } -void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata) +void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; -- cgit v1.2.3 From 7118c07a844d367560ee91adb2071bde2fabcdbf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 14 Apr 2012 12:37:46 -0400 Subject: ipvs: Verify that IP_VS protocol has been registered The registration of a protocol might fail, there were no checks and all registrations were assumed to be correct. This lead to NULL ptr dereferences when apps tried registering. For example: [ 1293.226051] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [ 1293.227038] IP: [] tcp_register_app+0x60/0xb0 [ 1293.227038] PGD 391de067 PUD 6c20b067 PMD 0 [ 1293.227038] Oops: 0000 [#1] PREEMPT SMP [ 1293.227038] CPU 1 [ 1293.227038] Pid: 19609, comm: trinity Tainted: G W 3.4.0-rc1-next-20120405-sasha-dirty #57 [ 1293.227038] RIP: 0010:[] [] tcp_register_app+0x60/0xb0 [ 1293.227038] RSP: 0018:ffff880038c1dd18 EFLAGS: 00010286 [ 1293.227038] RAX: ffffffffffffffc0 RBX: 0000000000001500 RCX: 0000000000010000 [ 1293.227038] RDX: 0000000000000000 RSI: ffff88003a2d5888 RDI: 0000000000000282 [ 1293.227038] RBP: ffff880038c1dd48 R08: 0000000000000000 R09: 0000000000000000 [ 1293.227038] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a2d5668 [ 1293.227038] R13: ffff88003a2d5988 R14: ffff8800696a8ff8 R15: 0000000000000000 [ 1293.227038] FS: 00007f01930d9700(0000) GS:ffff88007ce00000(0000) knlGS:0000000000000000 [ 1293.227038] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1293.227038] CR2: 0000000000000018 CR3: 0000000065dfc000 CR4: 00000000000406e0 [ 1293.227038] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1293.227038] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 1293.227038] Process trinity (pid: 19609, threadinfo ffff880038c1c000, task ffff88002dc73000) [ 1293.227038] Stack: [ 1293.227038] ffff880038c1dd48 00000000fffffff4 ffff8800696aada0 ffff8800694f5580 [ 1293.227038] ffffffff8369f1e0 0000000000001500 ffff880038c1dd98 ffffffff822a716b [ 1293.227038] 0000000000000000 ffff8800696a8ff8 0000000000000015 ffff8800694f5580 [ 1293.227038] Call Trace: [ 1293.227038] [] ip_vs_app_inc_new+0xdb/0x180 [ 1293.227038] [] register_ip_vs_app_inc+0x48/0x70 [ 1293.227038] [] __ip_vs_ftp_init+0xba/0x140 [ 1293.227038] [] ops_init+0x80/0x90 [ 1293.227038] [] setup_net+0x5b/0xe0 [ 1293.227038] [] copy_net_ns+0x76/0x100 [ 1293.227038] [] create_new_namespaces+0xfb/0x190 [ 1293.227038] [] unshare_nsproxy_namespaces+0x61/0x80 [ 1293.227038] [] sys_unshare+0xff/0x290 [ 1293.227038] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 1293.227038] [] system_call_fastpath+0x16/0x1b [ 1293.227038] Code: 89 c7 e8 34 91 3b 00 89 de 66 c1 ee 04 31 de 83 e6 0f 48 83 c6 22 48 c1 e6 04 4a 8b 14 26 49 8d 34 34 48 8d 42 c0 48 39 d6 74 13 <66> 39 58 58 74 22 48 8b 48 40 48 8d 41 c0 48 39 ce 75 ed 49 8d [ 1293.227038] RIP [] tcp_register_app+0x60/0xb0 [ 1293.227038] RSP [ 1293.227038] CR2: 0000000000000018 [ 1293.379284] ---[ end trace 364ab40c7011a009 ]--- [ 1293.381182] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Sasha Levin Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index f843a883325..a62360e2903 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -59,9 +59,6 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } -#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \ - defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \ - defined(CONFIG_IP_VS_PROTO_ESP) /* * register an ipvs protocols netns related data */ @@ -86,7 +83,6 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) return 0; } -#endif /* * unregister an ipvs protocol @@ -316,22 +312,35 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ int __net_init ip_vs_protocol_net_init(struct net *net) { + int i, ret; + static struct ip_vs_protocol *protos[] = { #ifdef CONFIG_IP_VS_PROTO_TCP - register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); + &ip_vs_protocol_tcp, #endif #ifdef CONFIG_IP_VS_PROTO_UDP - register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); + &ip_vs_protocol_udp, #endif #ifdef CONFIG_IP_VS_PROTO_SCTP - register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); + &ip_vs_protocol_sctp, #endif #ifdef CONFIG_IP_VS_PROTO_AH - register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); + &ip_vs_protocol_ah, #endif #ifdef CONFIG_IP_VS_PROTO_ESP - register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); + &ip_vs_protocol_esp, #endif + }; + + for (i = 0; i < ARRAY_SIZE(protos); i++) { + ret = register_ip_vs_proto_netns(net, protos[i]); + if (ret < 0) + goto cleanup; + } return 0; + +cleanup: + ip_vs_protocol_net_cleanup(net); + return ret; } void __net_exit ip_vs_protocol_net_cleanup(struct net *net) -- cgit v1.2.3 From 8f9b9a2fad47af27e14b037395e03cd8278d96d7 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 13 Apr 2012 18:08:43 +0300 Subject: ipvs: fix crash in ip_vs_control_net_cleanup on unload commit 14e405461e664b777e2a5636e10b2ebf36a686ec (2.6.39) ("Add __ip_vs_control_{init,cleanup}_sysctl()") introduced regression due to wrong __net_init for __ip_vs_control_cleanup_sysctl. This leads to crash when the ip_vs module is unloaded. Fix it by changing __net_init to __net_exit for the function that is already renamed to ip_vs_control_net_cleanup_sysctl. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b3afe189af6..376d2b12d58 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3680,7 +3680,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3692,7 +3692,7 @@ void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) #else int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { } +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif -- cgit v1.2.3 From 62ad6fcd743792bf294f2a7ba26ab8f462065150 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 24 Apr 2012 18:15:41 +0000 Subject: udp_diag: implement idiag_get_info for udp/udplite to get queue information When we use netlink to monitor queue information for udp socket, idiag_rqueue and idiag_wqueue of inet_diag_msg are returned with 0. Keep consistent with netstat, just return back allocated rmem/wmem size. Signed-off-by: Shan Wei Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 2 +- net/ipv4/udp_diag.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8d25a1c557e..8f8db724bfa 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -141,7 +141,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto rtattr_failure; if (icsk == NULL) { - r->idiag_rqueue = r->idiag_wqueue = 0; + handler->idiag_get_info(sk, r, NULL); goto out; } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 8a949f19deb..a7f86a3cd50 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -146,9 +146,17 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return udp_dump_one(&udp_table, in_skb, nlh, req); } +static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); +} + static const struct inet_diag_handler udp_diag_handler = { .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, + .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, }; @@ -167,6 +175,7 @@ static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr * static const struct inet_diag_handler udplite_diag_handler = { .dump = udplite_diag_dump, .dump_one = udplite_diag_dump_one, + .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, }; -- cgit v1.2.3 From 8d08d71ce59438a6ef06be5db07966e0c144b74e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 25 Apr 2012 00:29:59 +0300 Subject: ipvs: add check in ftp for initialized core Avoid crash when registering ip_vs_ftp after the IPVS core initialization for netns fails. Do this by checking for present core (net->ipvs). Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ftp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 538d74ee4f6..e39f693dd3e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net) struct ip_vs_app *app; struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); if (!app) return -ENOMEM; -- cgit v1.2.3 From 39f618b4fd95ae243d940ec64c961009c74e3333 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 25 Apr 2012 00:29:58 +0300 Subject: ipvs: reset ipvs pointer in netns Make sure net->ipvs is reset on netns cleanup or failed initialization. It is needed for IPVS applications to know that IPVS core is not loaded in netns. Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2555816e778..260b9ef8877 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1924,6 +1924,7 @@ protocol_fail: control_fail: ip_vs_estimator_net_cleanup(net); estimator_fail: + net->ipvs = NULL; return -ENOMEM; } @@ -1936,6 +1937,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net) ip_vs_control_net_cleanup(net); ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) -- cgit v1.2.3 From 0848e4043014631d792a66266d6d7d64a7f21da5 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:56 +0000 Subject: 6lowpan: fix segmentation fault caused by mlme request Add nescesary mlme callbacks to satisfy "iz list" request from user space. Due to 6lowpan device doesn't have its own phy, mlme implemented as a pipe to a real phy to which 6lowpan is attached. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 36851588536..f6b169439b5 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1044,6 +1044,24 @@ static void lowpan_dev_free(struct net_device *dev) free_netdev(dev); } +static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_phy(real_dev); +} + +static u16 lowpan_get_pan_id(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); +} + +static u16 lowpan_get_short_addr(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); +} + static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -1053,6 +1071,12 @@ static const struct net_device_ops lowpan_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; +static struct ieee802154_mlme_ops lowpan_mlme = { + .get_pan_id = lowpan_get_pan_id, + .get_phy = lowpan_get_phy, + .get_short_addr = lowpan_get_short_addr, +}; + static void lowpan_setup(struct net_device *dev) { pr_debug("(%s)\n", __func__); @@ -1070,6 +1094,7 @@ static void lowpan_setup(struct net_device *dev) dev->netdev_ops = &lowpan_netdev_ops; dev->header_ops = &lowpan_header_ops; + dev->ml_priv = &lowpan_mlme; dev->destructor = lowpan_dev_free; } -- cgit v1.2.3 From 8deff4af8745561efd2be34ee30cc57a6f0107c6 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:57 +0000 Subject: 6lowpan: clean up fragments list if module unloaded Clean all the pending fragments and relative timers if 6lowpan link is going to be deleted. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index f6b169439b5..b3021f76520 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1177,11 +1177,20 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head) { struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry; - struct lowpan_dev_record *tmp; + struct lowpan_dev_record *entry, *tmp; + struct lowpan_fragment *frame, *tframe; ASSERT_RTNL(); + spin_lock(&flist_lock); + list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) { + del_timer(&frame->timer); + list_del(&frame->list); + dev_kfree_skb(frame->skb); + kfree(frame); + } + spin_unlock(&flist_lock); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { if (entry->ldev == dev) { -- cgit v1.2.3 From 768f7c7c121e80f458a9d013b2e8b169e5dfb1e5 Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Wed, 25 Apr 2012 23:24:58 +0000 Subject: 6lowpan: add missing spin_lock_init() Add missing spin_lock_init() for frames list lock. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index b3021f76520..840821b90bc 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1168,6 +1168,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, list_add_tail(&entry->list, &lowpan_devices); mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + spin_lock_init(&flist_lock); + register_netdevice(dev); return 0; -- cgit v1.2.3 From 651913ce9de2bbcedef608c5d6cf39c244248509 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Apr 2012 11:29:37 -0400 Subject: tcp: clean up use of jiffies in tcp_rcv_rtt_measure() Clean up a reference to jiffies in tcp_rcv_rtt_measure() that should instead reference tcp_time_stamp. Since the result of the subtraction is passed into a function taking u32, this should not change any behavior (and indeed the generated assembly does not change on x86_64). However, it seems worth cleaning this up for consistency and clarity (and perhaps to avoid bugs if this is copied and pasted somewhere else). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ff36406537..2a702e3a4ae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -495,7 +495,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1); + tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1); new_measure: tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; -- cgit v1.2.3 From cde2e9a651b76d8db36ae94cd0febc82b637e5dd Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:48 +0000 Subject: drop_monitor: fix sleeping in invalid context warning Eric Dumazet pointed out this warning in the drop_monitor protocol to me: [ 38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85 [ 38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch [ 38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71 [ 38.352582] Call Trace: [ 38.352592] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352599] [] __might_sleep+0xca/0xf0 [ 38.352606] [] mutex_lock+0x26/0x50 [ 38.352610] [] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352616] [] tracepoint_probe_register+0x29/0x90 [ 38.352621] [] set_all_monitor_traces+0x105/0x170 [ 38.352625] [] net_dm_cmd_trace+0x2a/0x40 [ 38.352630] [] genl_rcv_msg+0x21a/0x2b0 [ 38.352636] [] ? zone_statistics+0x99/0xc0 [ 38.352640] [] ? genl_rcv+0x30/0x30 [ 38.352645] [] netlink_rcv_skb+0xa9/0xd0 [ 38.352649] [] genl_rcv+0x20/0x30 [ 38.352653] [] netlink_unicast+0x1ae/0x1f0 [ 38.352658] [] netlink_sendmsg+0x2b6/0x310 [ 38.352663] [] sock_sendmsg+0x10f/0x130 [ 38.352668] [] ? move_addr_to_kernel+0x60/0xb0 [ 38.352673] [] ? verify_iovec+0x64/0xe0 [ 38.352677] [] __sys_sendmsg+0x386/0x390 [ 38.352682] [] ? handle_mm_fault+0x139/0x210 [ 38.352687] [] ? do_page_fault+0x1ec/0x4f0 [ 38.352693] [] ? set_next_entity+0x9d/0xb0 [ 38.352699] [] ? tty_ldisc_deref+0x9/0x10 [ 38.352703] [] ? pick_next_task_fair+0x63/0x140 [ 38.352708] [] sys_sendmsg+0x44/0x80 [ 38.352713] [] system_call_fastpath+0x16/0x1b It stems from holding a spinlock (trace_state_lock) while attempting to register or unregister tracepoint hooks, making in_atomic() true in this context, leading to the warning when the tracepoint calls might_sleep() while its taking a mutex. Since we only use the trace_state_lock to prevent trace protocol state races, as well as hardware stat list updates on an rcu write side, we can just convert the spinlock to a mutex to avoid this problem. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 5c3c81a609e..a221a5bbecf 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -42,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_SPINLOCK(trace_state_lock); +static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -214,7 +214,7 @@ static int set_all_monitor_traces(int state) struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *temp; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); if (state == trace_state) { rc = -EAGAIN; @@ -253,7 +253,7 @@ static int set_all_monitor_traces(int state) rc = -EINPROGRESS; out_unlock: - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); return rc; } @@ -296,12 +296,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; case NETDEV_UNREGISTER: - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -312,7 +312,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; } out: -- cgit v1.2.3 From 3885ca785a3618593226687ced84f3f336dc3860 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 27 Apr 2012 10:11:49 +0000 Subject: drop_monitor: Make updating data->skb smp safe Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in its smp protections. Specifically, its possible to replace data->skb while its being written. This patch corrects that by making data->skb an rcu protected variable. That will prevent it from being overwritten while a tracepoint is modifying it. Signed-off-by: Neil Horman Reported-by: Eric Dumazet CC: David Miller Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 70 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a221a5bbecf..7592943513e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; - struct sk_buff *skb; + struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; }; @@ -73,35 +73,58 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); +static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; + struct sk_buff *skb; + struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); - data->skb = genlmsg_new(al, GFP_KERNEL); - genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - atomic_set(&data->dm_hit_count, dm_hit_limit); + skb = genlmsg_new(al, GFP_KERNEL); + + if (skb) { + genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); + memset(msg, 0, al); + } else if (initialized) + schedule_work_on(smp_processor_id(), &data->dm_alert_work); + + /* + * Don't need to lock this, since we are guaranteed to only + * run this on a single cpu at a time. + * Note also that we only update data->skb if the old and new skb + * pointers don't match. This ensures that we don't continually call + * synchornize_rcu if we repeatedly fail to alloc a new netlink message. + */ + if (skb != oskb) { + rcu_assign_pointer(data->skb, skb); + + synchronize_rcu(); + + atomic_set(&data->dm_hit_count, dm_hit_limit); + } + } static void send_dm_alert(struct work_struct *unused) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); /* * Grab the skb we're about to send */ - skb = data->skb; + skb = rcu_dereference_protected(data->skb, 1); /* * Replace it with a new one @@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused) /* * Ship it! */ - genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + if (skb) + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + put_cpu_var(dm_cpu_data); } /* @@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused) */ static void sched_send_work(unsigned long unused) { - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + schedule_work_on(smp_processor_id(), &data->dm_alert_work); - schedule_work(&data->dm_alert_work); + put_cpu_var(dm_cpu_data); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlmsghdr *nlh; struct nlattr *nla; int i; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct sk_buff *dskb; + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + rcu_read_lock(); + dskb = rcu_dereference(data->skb); + + if (!dskb) + goto out; + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { /* * we're already at zero, discard this hit @@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } - nlh = (struct nlmsghdr *)data->skb->data; + nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { @@ -158,7 +192,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) /* * We need to create a new entry */ - __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; @@ -170,6 +204,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) } out: + rcu_read_unlock(); + put_cpu_var(dm_cpu_data); return; } @@ -375,6 +411,8 @@ static int __init init_net_drop_monitor(void) data->send_timer.function = sched_send_work; } + initialized = 1; + goto out; out_unreg: -- cgit v1.2.3 From 4b984cd50bc1b6d492175cd77bfabb78e76ffa67 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 09:45:34 +0200 Subject: ipvs: null check of net->ipvs in lblc(r) shedulers Avoid crash when registering shedulers after the IPVS core initialization for netns fails. Do this by checking for present core (net->ipvs). Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 3 +++ net/netfilter/ipvs/ip_vs_lblcr.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 0f16283fd05..caa43704e55 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblc_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index eec797f8cce..548bf37aa29 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), -- cgit v1.2.3 From 582b8e3eadaec77788c1aa188081a8d5059c42a6 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 09:45:35 +0200 Subject: ipvs: take care of return value from protocol init_netns ip_vs_create_timeout_table() can return NULL All functions protocol init_netns is affected of this patch. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_proto.c | 11 +++++++++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 ++++- 5 files changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2bdee51ba30..6d90dda2ddb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -393,7 +393,7 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); - void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); + int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index a62360e2903..ed835e67a07 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -78,8 +78,15 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) ipvs->proto_data_table[hash] = pd; atomic_set(&pd->appcnt, 0); /* Init app counter */ - if (pp->init_netns != NULL) - pp->init_netns(net, pd); + if (pp->init_netns != NULL) { + int ret = pp->init_netns(net, pd); + if (ret) { + /* unlink an free proto data */ + ipvs->proto_data_table[hash] = pd->next; + kfree(pd); + return ret; + } + } return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1fbf7a2816f..9f3fb751c49 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1090,7 +1090,7 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index ef8641f7af8..cd609cc6272 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; pd->tcp_state_table = tcp_states; + return 0; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f4b7262896b..2fedb2dcb3d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) -- cgit v1.2.3 From 8537de8a7ab6681cc72fb0411ab1ba7fdba62dd0 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 26 Apr 2012 07:47:44 +0200 Subject: ipvs: kernel oops - do_ip_vs_get_ctl Change order of init so netns init is ready when register ioctl and netlink. Ver2 Whitespace fixes and __init added. Reported-by: "Ryan O'Hara" Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 ++ net/netfilter/ipvs/ip_vs_core.c | 9 +++++++ net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6d90dda2ddb..72522f08737 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1203,6 +1203,8 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); +extern int ip_vs_register_nl_ioctl(void); +extern void ip_vs_unregister_nl_ioctl(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 260b9ef8877..00bdb1d9d69 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1995,10 +1995,18 @@ static int __init ip_vs_init(void) goto cleanup_dev; } + ret = ip_vs_register_nl_ioctl(); + if (ret < 0) { + pr_err("can't register netlink/ioctl.\n"); + goto cleanup_hooks; + } + pr_info("ipvs loaded.\n"); return ret; +cleanup_hooks: + nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: @@ -2014,6 +2022,7 @@ exit: static void __exit ip_vs_cleanup(void) { + ip_vs_unregister_nl_ioctl(); nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 376d2b12d58..f5589987fc8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3750,21 +3750,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -int __init ip_vs_control_init(void) +int __init ip_vs_register_nl_ioctl(void) { - int idx; int ret; - EnterFunction(2); - - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); @@ -3776,28 +3765,47 @@ int __init ip_vs_control_init(void) pr_err("cannot register Generic Netlink interface.\n"); goto err_genl; } - - ret = register_netdevice_notifier(&ip_vs_dst_notifier); - if (ret < 0) - goto err_notf; - - LeaveFunction(2); return 0; -err_notf: - ip_vs_genl_unregister(); err_genl: nf_unregister_sockopt(&ip_vs_sockopts); err_sock: return ret; } +void ip_vs_unregister_nl_ioctl(void) +{ + ip_vs_genl_unregister(); + nf_unregister_sockopt(&ip_vs_sockopts); +} + +int __init ip_vs_control_init(void) +{ + int idx; + int ret; + + EnterFunction(2); + + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_LIST_HEAD(&ip_vs_svc_table[idx]); + INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + } + + smp_wmb(); /* Do we really need it now ? */ + + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + return ret; + + LeaveFunction(2); + return 0; +} + void ip_vs_control_cleanup(void) { EnterFunction(2); unregister_netdevice_notifier(&ip_vs_dst_notifier); - ip_vs_genl_unregister(); - nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } -- cgit v1.2.3 From 6cf51852486af3d79f57bf46d00209a14244dbaa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 Apr 2012 02:00:50 +0200 Subject: netfilter: xt_CT: fix wrong checking in the timeout assignment path The current checking always succeeded. We have to check the first character of the string to check that it's empty, thus, skipping the timeout path. This fixes the use of the CT target without the timeout option. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 59530e93fa5..3746d8b9a47 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -227,7 +227,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - if (info->timeout) { + if (info->timeout[0]) { typeof(nf_ct_timeout_find_get_hook) timeout_find_get; struct nf_conn_timeout *timeout_ext; -- cgit v1.2.3 From 1cebce36d660c83bd1353e41f3e66abd4686f215 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 30 Apr 2012 06:00:18 +0000 Subject: tcp: fix infinite cwnd in tcp_complete_cwr() When the cwnd reduction is done, ssthresh may be infinite if TCP enters CWR via ECN or F-RTO. If cwnd is not undone, i.e., undo_marker is set, tcp_complete_cwr() falsely set cwnd to the infinite ssthresh value. The correct operation is to keep cwnd intact because it has been updated in ECN or F-RTO. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a702e3a4ae..d99efd7dfb6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2868,11 +2868,14 @@ static inline void tcp_complete_cwr(struct sock *sk) /* Do not moderate cwnd if it's already undone in cwr or recovery. */ if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - else /* PRR */ + tp->snd_cwnd_stamp = tcp_time_stamp; + } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { + /* PRR algorithm. */ tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_time_stamp; + } } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -- cgit v1.2.3 From 66f2c99af3d6f2d0aa1120884cf1c60613ef61c0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 29 Apr 2012 15:44:16 +0200 Subject: mac80211: fix AP mode EAP tx for VLAN stations EAP frames for stations in an AP VLAN are sent on the main AP interface to avoid race conditions wrt. moving stations. For that to work properly, sta_info_get_bss must be used instead of sta_info_get when sending EAP packets. Previously this was only done for cooked monitor injected packets, so this patch adds a check for tx->skb->protocol to the same place. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 782a60198df..e76facc69e9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1158,7 +1158,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED) { + } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } if (!tx->sta) -- cgit v1.2.3 From 116a0fc31c6c9b8fc821be5a96e5bf0b43260131 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Apr 2012 09:08:22 +0000 Subject: netem: fix possible skb leak skb_checksum_help(skb) can return an error, we must free skb in this case. qdisc_drop(skb, sch) can also be feeded with a NULL skb (if skb_unshare() failed), so lets use this generic helper. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5da548fa7ae..ebd22966f74 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -408,10 +408,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } -- cgit v1.2.3 From 4fdcfa12843bca38d0c9deff70c8720e4e8f515f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 1 May 2012 08:18:02 +0000 Subject: drop_monitor: prevent init path from scheduling on the wrong cpu I just noticed after some recent updates, that the init path for the drop monitor protocol has a minor error. drop monitor maintains a per cpu structure, that gets initalized from a single cpu. Normally this is fine, as the protocol isn't in use yet, but I recently made a change that causes a failed skb allocation to reschedule itself . Given the current code, the implication is that this workqueue reschedule will take place on the wrong cpu. If drop monitor is used early during the boot process, its possible that two cpus will access a single per-cpu structure in parallel, possibly leading to data corruption. This patch fixes the situation, by storing the cpu number that a given instance of this per-cpu data should be accessed from. In the case of a need for a reschedule, the cpu stored in the struct is assigned the rescheule, rather than the currently executing cpu Tested successfully by myself. Signed-off-by: Neil Horman CC: David Miller Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 7592943513e..a7cad741df0 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -49,6 +49,7 @@ struct per_cpu_dm_data { struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; + int cpu; }; struct dm_hw_stat_delta { @@ -73,7 +74,6 @@ static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static int initialized = 0; static void reset_per_cpu_data(struct per_cpu_dm_data *data) { @@ -96,8 +96,8 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) sizeof(struct net_dm_alert_msg)); msg = nla_data(nla); memset(msg, 0, al); - } else if (initialized) - schedule_work_on(smp_processor_id(), &data->dm_alert_work); + } else + schedule_work_on(data->cpu, &data->dm_alert_work); /* * Don't need to lock this, since we are guaranteed to only @@ -121,6 +121,8 @@ static void send_dm_alert(struct work_struct *unused) struct sk_buff *skb; struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + WARN_ON_ONCE(data->cpu != smp_processor_id()); + /* * Grab the skb we're about to send */ @@ -404,14 +406,14 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - reset_per_cpu_data(data); + data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); data->send_timer.data = cpu; data->send_timer.function = sched_send_work; + reset_per_cpu_data(data); } - initialized = 1; goto out; -- cgit v1.2.3 From 84768edbb2721637620b2d84501bb0d5aed603f1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 2 May 2012 03:58:43 +0000 Subject: net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg l2tp_ip_sendmsg could return without releasing socket lock, making it all the way to userspace, and generating the following warning: [ 130.891594] ================================================ [ 130.894569] [ BUG: lock held when returning to user space! ] [ 130.897257] 3.4.0-rc5-next-20120501-sasha #104 Tainted: G W [ 130.900336] ------------------------------------------------ [ 130.902996] trinity/8384 is leaving the kernel with locks still held! [ 130.906106] 1 lock held by trinity/8384: [ 130.907924] #0: (sk_lock-AF_INET){+.+.+.}, at: [] l2tp_ip_sendmsg+0x2f/0x550 Introduced by commit 2f16270 ("l2tp: Fix locking in l2tp_ip.c"). Signed-off-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 585d93ecee2..6274f0be82b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -442,8 +442,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m daddr = lip->l2tp_addr.s_addr; } else { + rc = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; + goto out; daddr = inet->inet_daddr; connected = 1; -- cgit v1.2.3 From b49960a05e32121d29316cfdf653894b88ac9190 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2012 02:28:41 +0000 Subject: tcp: change tcp_adv_win_scale and tcp_rmem[2] tcp_adv_win_scale default value is 2, meaning we expect a good citizen skb to have skb->len / skb->truesize ratio of 75% (3/4) In 2.6 kernels we (mis)accounted for typical MSS=1460 frame : 1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392. So these skbs were considered as not bloated. With recent truesize fixes, a typical MSS=1460 frame truesize is now the more precise : 2048 + 256 = 2304. But 2304 * 3/4 = 1728. So these skb are not good citizen anymore, because 1460 < 1728 (GRO can escape this problem because it build skbs with a too low truesize.) This also means tcp advertises a too optimistic window for a given allocated rcvspace : When receiving frames, sk_rmem_alloc can hit sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often, especially when application is slow to drain its receive queue or in case of losses (netperf is fast, scp is slow). This is a major latency source. We should adjust the len/truesize ratio to 50% instead of 75% This patch : 1) changes tcp_adv_win_scale default to 1 instead of 2 2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account better truesize tracking and to allow autotuning tcp receive window to reach same value than before. Note that same amount of kernel memory is consumed compared to 2.6 kernels. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 4 ++-- net/ipv4/tcp.c | 9 +++++---- net/ipv4/tcp_input.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index bd80ba5847d..1619a8c8087 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -147,7 +147,7 @@ tcp_adv_win_scale - INTEGER (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), if it is <= 0. Possible values are [-31, 31], inclusive. - Default: 2 + Default: 1 tcp_allowed_congestion_control - STRING Show/set the congestion control choices available to non-privileged @@ -410,7 +410,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables automatic tuning of that socket's receive buffer size, in which case this value is ignored. - Default: between 87380B and 4MB, depending on RAM size. + Default: between 87380B and 6MB, depending on RAM size. tcp_sack - BOOLEAN Enable select acknowledgments (SACKS). diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8bb6adeb62c..1272a88c2a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3243,7 +3243,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long limit; - int max_share, cnt; + int max_rshare, max_wshare, cnt; unsigned int i; unsigned long jiffy = jiffies; @@ -3303,15 +3303,16 @@ void __init tcp_init(void) tcp_init_mem(&init_net); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_share = min(4UL*1024*1024, limit); + max_wshare = min(4UL*1024*1024, limit); + max_rshare = min(6UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_share); + sysctl_tcp_wmem[2] = max(64*1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_share); + sysctl_tcp_rmem[2] = max(87380, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d99efd7dfb6..257b61789ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2; EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 2; +int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; -- cgit v1.2.3