From 745c0ce35f904aeff8e1ea325c259a14a00ff1b7 Mon Sep 17 00:00:00 2001
From: Vishal Agarwal <vishal.agarwal@stericsson.com>
Date: Fri, 13 Apr 2012 17:43:22 +0530
Subject: Bluetooth: hci_persistent_key should return bool

This patch changes the return type of function hci_persistent_key
from int to bool because it makes more sense to return information
whether a key is persistent or not as a bool.

Signed-off-by: Vishal Agarwal <vishal.agarwal@stericsson.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 21 +++++++++++----------
 net/bluetooth/mgmt.c     |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2054c1321c8..c2251e4c3b7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1216,40 +1216,40 @@ struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
 	return NULL;
 }
 
-static int hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
+static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
 						u8 key_type, u8 old_key_type)
 {
 	/* Legacy key */
 	if (key_type < 0x03)
-		return 1;
+		return true;
 
 	/* Debug keys are insecure so don't store them persistently */
 	if (key_type == HCI_LK_DEBUG_COMBINATION)
-		return 0;
+		return false;
 
 	/* Changed combination key and there's no previous one */
 	if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff)
-		return 0;
+		return false;
 
 	/* Security mode 3 case */
 	if (!conn)
-		return 1;
+		return true;
 
 	/* Neither local nor remote side had no-bonding as requirement */
 	if (conn->auth_type > 0x01 && conn->remote_auth > 0x01)
-		return 1;
+		return true;
 
 	/* Local side had dedicated bonding as requirement */
 	if (conn->auth_type == 0x02 || conn->auth_type == 0x03)
-		return 1;
+		return true;
 
 	/* Remote side had dedicated bonding as requirement */
 	if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03)
-		return 1;
+		return true;
 
 	/* If none of the above criteria match, then don't store the key
 	 * persistently */
-	return 0;
+	return false;
 }
 
 struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, u8 rand[8])
@@ -1286,7 +1286,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
 		     bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len)
 {
 	struct link_key *key, *old_key;
-	u8 old_key_type, persistent;
+	u8 old_key_type;
+	bool persistent;
 
 	old_key = hci_find_link_key(hdev, bdaddr);
 	if (old_key) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 4ef275c6967..4bb03b11112 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2884,7 +2884,7 @@ int mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status)
 	return 0;
 }
 
-int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, u8 persistent)
+int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent)
 {
 	struct mgmt_ev_new_link_key ev;
 
-- 
cgit v1.2.3


From 6ec5bcadc21e13ceba8c144e4731eccac01d04f7 Mon Sep 17 00:00:00 2001
From: Vishal Agarwal <vishal.agarwal@stericsson.com>
Date: Mon, 16 Apr 2012 14:44:44 +0530
Subject: Bluetooth: Temporary keys should be retained during connection

If a key is non persistent then it should not be used in future
connections but it should be kept for current connection. And it
should be removed when connecion is removed.

Signed-off-by: Vishal Agarwal <vishal.agarwal@stericsson.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 6 ++----
 net/bluetooth/hci_event.c        | 2 ++
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f8577c16fcf..db1c5df4522 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -314,6 +314,7 @@ struct hci_conn {
 
 	__u8		remote_cap;
 	__u8		remote_auth;
+	bool		flush_key;
 
 	unsigned int	sent;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c2251e4c3b7..a7607e4be34 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1330,10 +1330,8 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
 
 	mgmt_new_link_key(hdev, key, persistent);
 
-	if (!persistent) {
-		list_del(&key->list);
-		kfree(key);
-	}
+	if (conn)
+		conn->flush_key = !persistent;
 
 	return 0;
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index badb7851d11..6a72eaea70e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1902,6 +1902,8 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
 	}
 
 	if (ev->status == 0) {
+		if (conn->type == ACL_LINK && conn->flush_key)
+			hci_remove_link_key(hdev, &conn->dst);
 		hci_proto_disconn_cfm(conn, ev->reason);
 		hci_conn_del(conn);
 	}
-- 
cgit v1.2.3


From a881e963c7fe1f226e991ee9bbe8907acda93294 Mon Sep 17 00:00:00 2001
From: "Peter Huang (Peng)" <peter.huangpeng@huawei.com>
Date: Thu, 19 Apr 2012 20:12:51 +0000
Subject: set fake_rtable's dst to NULL to avoid kernel Oops

bridge: set fake_rtable's dst to NULL to avoid kernel Oops

when bridge is deleted before tap/vif device's delete, kernel may
encounter an oops because of NULL reference to fake_rtable's dst.
Set fake_rtable's dst to NULL before sending packets out can solve
this problem.

v4 reformat, change br_drop_fake_rtable(skb) to {}

v3 enrich commit header

v2 introducing new flag DST_FAKE_RTABLE to dst_entry struct.

[ Use "do { } while (0)" for nop br_drop_fake_rtable()
  implementation -DaveM ]

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Peter Huang <peter.huangpeng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge.h | 9 +++++++++
 include/net/dst.h                | 1 +
 net/bridge/br_forward.c          | 1 +
 net/bridge/br_netfilter.c        | 8 ++------
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 0ddd161f3b0..31d2844e657 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -104,9 +104,18 @@ struct bridge_skb_cb {
 	} daddr;
 };
 
+static inline void br_drop_fake_rtable(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+
+	if (dst && (dst->flags & DST_FAKE_RTABLE))
+		skb_dst_drop(skb);
+}
+
 #else
 #define nf_bridge_maybe_copy_header(skb)	(0)
 #define nf_bridge_pad(skb)			(0)
+#define br_drop_fake_rtable(skb)	        do { } while (0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
 #endif /* __KERNEL__ */
diff --git a/include/net/dst.h b/include/net/dst.h
index ff4da42fcfc..bed833d9796 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -59,6 +59,7 @@ struct dst_entry {
 #define DST_NOCACHE		0x0010
 #define DST_NOCOUNT		0x0020
 #define DST_NOPEER		0x0040
+#define DST_FAKE_RTABLE		0x0080
 
 	short			error;
 	short			obsolete;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 61f65344e71..a2098e3de50 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -47,6 +47,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 		kfree_skb(skb);
 	} else {
 		skb_push(skb, ETH_HLEN);
+		br_drop_fake_rtable(skb);
 		dev_queue_xmit(skb);
 	}
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index dec4f381713..d7f49b63ab0 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -156,7 +156,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 	rt->dst.dev = br->dev;
 	rt->dst.path = &rt->dst;
 	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
-	rt->dst.flags	= DST_NOXFRM | DST_NOPEER;
+	rt->dst.flags	= DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE;
 	rt->dst.ops = &fake_dst_ops;
 }
 
@@ -694,11 +694,7 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	struct rtable *rt = skb_rtable(skb);
-
-	if (rt && rt == bridge_parent_rtable(in))
-		skb_dst_drop(skb);
-
+	br_drop_fake_rtable(skb);
 	return NF_ACCEPT;
 }
 
-- 
cgit v1.2.3


From 16cde9931bcd8d8ca968ef1cded02684ea040374 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Fri, 13 Apr 2012 12:32:42 +0200
Subject: Bluetooth: Fix missing break in hci_cmd_complete_evt

Command complete event for HCI_OP_USER_PASSKEY_NEG_REPLY would result
in calling handler function also for HCI_OP_LE_SET_SCAN_PARAM. This
could result in undefined behaviour.

Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Signed-off-by: Gustavo Padovan <gustavo@padovan.org>
---
 net/bluetooth/hci_event.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6a72eaea70e..7f87a70b861 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2314,6 +2314,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	case HCI_OP_USER_PASSKEY_NEG_REPLY:
 		hci_cc_user_passkey_neg_reply(hdev, skb);
+		break;
 
 	case HCI_OP_LE_SET_SCAN_PARAM:
 		hci_cc_le_set_scan_param(hdev, skb);
-- 
cgit v1.2.3


From afa762f6871a8cb05fbef5d0f83fac14304aa816 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Mon, 23 Apr 2012 14:45:15 +0300
Subject: mac80211: call ieee80211_mgd_stop() on interface stop

ieee80211_mgd_teardown() is called on netdev removal, which
occurs after the vif was already removed from the low-level
driver, resulting in the following warning:

[ 4809.014734] ------------[ cut here ]------------
[ 4809.019861] WARNING: at net/mac80211/driver-ops.h:12 ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211]()
[ 4809.030388] wlan0:  Failed check-sdata-in-driver check, flags: 0x4
[ 4809.036862] Modules linked in: wlcore_sdio(-) wl12xx wlcore mac80211 cfg80211 [last unloaded: cfg80211]
[ 4809.046849] [<c001bd4c>] (unwind_backtrace+0x0/0x12c)
[ 4809.055937] [<c047cf1c>] (dump_stack+0x20/0x24)
[ 4809.065385] [<c003e334>] (warn_slowpath_common+0x5c/0x74)
[ 4809.075589] [<c003e408>] (warn_slowpath_fmt+0x40/0x48)
[ 4809.088291] [<bf033630>] (ieee80211_bss_info_change_notify+0x200/0x2c8 [mac80211])
[ 4809.102844] [<bf067f84>] (ieee80211_destroy_auth_data+0x80/0xa4 [mac80211])
[ 4809.116276] [<bf068004>] (ieee80211_mgd_teardown+0x5c/0x74 [mac80211])
[ 4809.129331] [<bf043f18>] (ieee80211_teardown_sdata+0xb0/0xd8 [mac80211])
[ 4809.141595] [<c03b5e58>] (rollback_registered_many+0x228/0x2f0)
[ 4809.153056] [<c03b5f48>] (unregister_netdevice_many+0x28/0x50)
[ 4809.165696] [<bf041ea8>] (ieee80211_remove_interfaces+0xb4/0xdc [mac80211])
[ 4809.179151] [<bf032174>] (ieee80211_unregister_hw+0x50/0xf0 [mac80211])
[ 4809.191043] [<bf0bebb4>] (wlcore_remove+0x5c/0x7c [wlcore])
[ 4809.201491] [<c02c6918>] (platform_drv_remove+0x24/0x28)
[ 4809.212029] [<c02c4d50>] (__device_release_driver+0x8c/0xcc)
[ 4809.222738] [<c02c4e84>] (device_release_driver+0x30/0x3c)
[ 4809.233099] [<c02c4258>] (bus_remove_device+0x10c/0x128)
[ 4809.242620] [<c02c26f8>] (device_del+0x11c/0x17c)
[ 4809.252150] [<c02c6de0>] (platform_device_del+0x28/0x68)
[ 4809.263051] [<bf0df49c>] (wl1271_remove+0x3c/0x50 [wlcore_sdio])
[ 4809.273590] [<c03806b0>] (sdio_bus_remove+0x48/0xf8)
[ 4809.283754] [<c02c4d50>] (__device_release_driver+0x8c/0xcc)
[ 4809.293729] [<c02c4e2c>] (driver_detach+0x9c/0xc4)
[ 4809.303163] [<c02c3d7c>] (bus_remove_driver+0xc4/0xf4)
[ 4809.312973] [<c02c5a98>] (driver_unregister+0x70/0x7c)
[ 4809.323220] [<c03809c4>] (sdio_unregister_driver+0x24/0x2c)
[ 4809.334213] [<bf0df458>] (wl1271_exit+0x14/0x1c [wlcore_sdio])
[ 4809.344930] [<c009b1a4>] (sys_delete_module+0x228/0x2a8)
[ 4809.354734] ---[ end trace 515290ccf5feb522 ]---

Rename ieee80211_mgd_teardown() to ieee80211_mgd_stop(),
and call it on ieee80211_do_stop().

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 2 +-
 net/mac80211/iface.c       | 4 ++--
 net/mac80211/mlme.c        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d9798a307f2..db8fae51714 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1210,7 +1210,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 				  struct sk_buff *skb);
 void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
-void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
 
 /* IBSS code */
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 401c01f0731..c20051b7ffc 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -486,6 +486,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		/* free all potentially still buffered bcast frames */
 		local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps_bc_buf);
 		skb_queue_purge(&sdata->u.ap.ps_bc_buf);
+	} else if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+		ieee80211_mgd_stop(sdata);
 	}
 
 	if (going_down)
@@ -644,8 +646,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_rmc_free(sdata);
-	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		ieee80211_mgd_teardown(sdata);
 
 	flushed = sta_info_flush(local, sdata);
 	WARN_ON(flushed);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f76da5b3f5c..20c680bfc3a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3497,7 +3497,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-void ieee80211_mgd_teardown(struct ieee80211_sub_if_data *sdata)
+void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-- 
cgit v1.2.3


From 7118c07a844d367560ee91adb2071bde2fabcdbf Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Sat, 14 Apr 2012 12:37:46 -0400
Subject: ipvs: Verify that IP_VS protocol has been registered

The registration of a protocol might fail, there were no checks
and all registrations were assumed to be correct. This lead to
NULL ptr dereferences when apps tried registering.

For example:

[ 1293.226051] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 1293.227038] IP: [<ffffffff822aacb0>] tcp_register_app+0x60/0xb0
[ 1293.227038] PGD 391de067 PUD 6c20b067 PMD 0
[ 1293.227038] Oops: 0000 [#1] PREEMPT SMP
[ 1293.227038] CPU 1
[ 1293.227038] Pid: 19609, comm: trinity Tainted: G        W    3.4.0-rc1-next-20120405-sasha-dirty #57
[ 1293.227038] RIP: 0010:[<ffffffff822aacb0>]  [<ffffffff822aacb0>] tcp_register_app+0x60/0xb0
[ 1293.227038] RSP: 0018:ffff880038c1dd18  EFLAGS: 00010286
[ 1293.227038] RAX: ffffffffffffffc0 RBX: 0000000000001500 RCX: 0000000000010000
[ 1293.227038] RDX: 0000000000000000 RSI: ffff88003a2d5888 RDI: 0000000000000282
[ 1293.227038] RBP: ffff880038c1dd48 R08: 0000000000000000 R09: 0000000000000000
[ 1293.227038] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a2d5668
[ 1293.227038] R13: ffff88003a2d5988 R14: ffff8800696a8ff8 R15: 0000000000000000
[ 1293.227038] FS:  00007f01930d9700(0000) GS:ffff88007ce00000(0000) knlGS:0000000000000000
[ 1293.227038] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1293.227038] CR2: 0000000000000018 CR3: 0000000065dfc000 CR4: 00000000000406e0
[ 1293.227038] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1293.227038] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1293.227038] Process trinity (pid: 19609, threadinfo ffff880038c1c000, task ffff88002dc73000)
[ 1293.227038] Stack:
[ 1293.227038]  ffff880038c1dd48 00000000fffffff4 ffff8800696aada0 ffff8800694f5580
[ 1293.227038]  ffffffff8369f1e0 0000000000001500 ffff880038c1dd98 ffffffff822a716b
[ 1293.227038]  0000000000000000 ffff8800696a8ff8 0000000000000015 ffff8800694f5580
[ 1293.227038] Call Trace:
[ 1293.227038]  [<ffffffff822a716b>] ip_vs_app_inc_new+0xdb/0x180
[ 1293.227038]  [<ffffffff822a7258>] register_ip_vs_app_inc+0x48/0x70
[ 1293.227038]  [<ffffffff822b2fea>] __ip_vs_ftp_init+0xba/0x140
[ 1293.227038]  [<ffffffff821c9060>] ops_init+0x80/0x90
[ 1293.227038]  [<ffffffff821c90cb>] setup_net+0x5b/0xe0
[ 1293.227038]  [<ffffffff821c9416>] copy_net_ns+0x76/0x100
[ 1293.227038]  [<ffffffff810dc92b>] create_new_namespaces+0xfb/0x190
[ 1293.227038]  [<ffffffff810dca21>] unshare_nsproxy_namespaces+0x61/0x80
[ 1293.227038]  [<ffffffff810afd1f>] sys_unshare+0xff/0x290
[ 1293.227038]  [<ffffffff8187622e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[ 1293.227038]  [<ffffffff82665539>] system_call_fastpath+0x16/0x1b
[ 1293.227038] Code: 89 c7 e8 34 91 3b 00 89 de 66 c1 ee 04 31 de 83 e6 0f 48 83 c6 22 48 c1 e6 04 4a 8b 14 26 49 8d 34 34 48 8d 42 c0 48 39 d6 74 13 <66> 39 58 58 74 22 48 8b 48 40 48 8d 41 c0 48 39 ce 75 ed 49 8d
[ 1293.227038] RIP  [<ffffffff822aacb0>] tcp_register_app+0x60/0xb0
[ 1293.227038]  RSP <ffff880038c1dd18>
[ 1293.227038] CR2: 0000000000000018
[ 1293.379284] ---[ end trace 364ab40c7011a009 ]---
[ 1293.381182] Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_proto.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f843a883325..a62360e2903 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -59,9 +59,6 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return 0;
 }
 
-#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
-    defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
-    defined(CONFIG_IP_VS_PROTO_ESP)
 /*
  *	register an ipvs protocols netns related data
  */
@@ -86,7 +83,6 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 
 	return 0;
 }
-#endif
 
 /*
  *	unregister an ipvs protocol
@@ -316,22 +312,35 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
  */
 int __net_init ip_vs_protocol_net_init(struct net *net)
 {
+	int i, ret;
+	static struct ip_vs_protocol *protos[] = {
 #ifdef CONFIG_IP_VS_PROTO_TCP
-	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+        &ip_vs_protocol_tcp,
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
-	register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+	&ip_vs_protocol_udp,
 #endif
 #ifdef CONFIG_IP_VS_PROTO_SCTP
-	register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+	&ip_vs_protocol_sctp,
 #endif
 #ifdef CONFIG_IP_VS_PROTO_AH
-	register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+	&ip_vs_protocol_ah,
 #endif
 #ifdef CONFIG_IP_VS_PROTO_ESP
-	register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+	&ip_vs_protocol_esp,
 #endif
+	};
+
+	for (i = 0; i < ARRAY_SIZE(protos); i++) {
+		ret = register_ip_vs_proto_netns(net, protos[i]);
+		if (ret < 0)
+			goto cleanup;
+	}
 	return 0;
+
+cleanup:
+	ip_vs_protocol_net_cleanup(net);
+	return ret;
 }
 
 void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
-- 
cgit v1.2.3


From 8f9b9a2fad47af27e14b037395e03cd8278d96d7 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 13 Apr 2012 18:08:43 +0300
Subject: ipvs: fix crash in ip_vs_control_net_cleanup on unload

	commit 14e405461e664b777e2a5636e10b2ebf36a686ec (2.6.39)
("Add __ip_vs_control_{init,cleanup}_sysctl()")
introduced regression due to wrong __net_init for
__ip_vs_control_cleanup_sysctl. This leads to crash when
the ip_vs module is unloaded.

	Fix it by changing __net_init to __net_exit for
the function that is already renamed to ip_vs_control_net_cleanup_sysctl.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b3afe189af6..376d2b12d58 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3680,7 +3680,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	return 0;
 }
 
-void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
+void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -3692,7 +3692,7 @@ void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
 #else
 
 int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
 
 #endif
 
-- 
cgit v1.2.3


From 62ad6fcd743792bf294f2a7ba26ab8f462065150 Mon Sep 17 00:00:00 2001
From: Shan Wei <davidshan@tencent.com>
Date: Tue, 24 Apr 2012 18:15:41 +0000
Subject: udp_diag: implement idiag_get_info for udp/udplite to get queue
 information

When we use netlink to monitor queue information for udp socket,
idiag_rqueue and idiag_wqueue of inet_diag_msg are returned with 0.

Keep consistent with netstat, just return back allocated rmem/wmem size.

Signed-off-by: Shan Wei <davidshan@tencent.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 2 +-
 net/ipv4/udp_diag.c  | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8d25a1c557e..8f8db724bfa 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -141,7 +141,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			goto rtattr_failure;
 
 	if (icsk == NULL) {
-		r->idiag_rqueue = r->idiag_wqueue = 0;
+		handler->idiag_get_info(sk, r, NULL);
 		goto out;
 	}
 
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 8a949f19deb..a7f86a3cd50 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -146,9 +146,17 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
 
+static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+		void *info)
+{
+	r->idiag_rqueue = sk_rmem_alloc_get(sk);
+	r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
+
 static const struct inet_diag_handler udp_diag_handler = {
 	.dump		 = udp_diag_dump,
 	.dump_one	 = udp_diag_dump_one,
+	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDP,
 };
 
@@ -167,6 +175,7 @@ static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *
 static const struct inet_diag_handler udplite_diag_handler = {
 	.dump		 = udplite_diag_dump,
 	.dump_one	 = udplite_diag_dump_one,
+	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDPLITE,
 };
 
-- 
cgit v1.2.3


From 8d08d71ce59438a6ef06be5db07966e0c144b74e Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 25 Apr 2012 00:29:59 +0300
Subject: ipvs: add check in ftp for initialized core

	Avoid crash when registering ip_vs_ftp after
the IPVS core initialization for netns fails. Do this by
checking for present core (net->ipvs).

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 538d74ee4f6..e39f693dd3e 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 	struct ip_vs_app *app;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	if (!ipvs)
+		return -ENOENT;
 	app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
 	if (!app)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 39f618b4fd95ae243d940ec64c961009c74e3333 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 25 Apr 2012 00:29:58 +0300
Subject: ipvs: reset ipvs pointer in netns

	Make sure net->ipvs is reset on netns cleanup or failed
initialization. It is needed for IPVS applications to know that
IPVS core is not loaded in netns.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2555816e778..260b9ef8877 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1924,6 +1924,7 @@ protocol_fail:
 control_fail:
 	ip_vs_estimator_net_cleanup(net);
 estimator_fail:
+	net->ipvs = NULL;
 	return -ENOMEM;
 }
 
@@ -1936,6 +1937,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
 	ip_vs_control_net_cleanup(net);
 	ip_vs_estimator_net_cleanup(net);
 	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	net->ipvs = NULL;
 }
 
 static void __net_exit __ip_vs_dev_cleanup(struct net *net)
-- 
cgit v1.2.3


From 0848e4043014631d792a66266d6d7d64a7f21da5 Mon Sep 17 00:00:00 2001
From: "alex.bluesman.smirnov@gmail.com" <alex.bluesman.smirnov@gmail.com>
Date: Wed, 25 Apr 2012 23:24:56 +0000
Subject: 6lowpan: fix segmentation fault caused by mlme request

Add nescesary mlme callbacks to satisfy "iz list" request from user space.
Due to 6lowpan device doesn't have its own phy, mlme implemented as a pipe
to a real phy to which 6lowpan is attached.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 36851588536..f6b169439b5 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1044,6 +1044,24 @@ static void lowpan_dev_free(struct net_device *dev)
 	free_netdev(dev);
 }
 
+static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+	return ieee802154_mlme_ops(real_dev)->get_phy(real_dev);
+}
+
+static u16 lowpan_get_pan_id(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+	return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
+}
+
+static u16 lowpan_get_short_addr(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
+}
+
 static struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
@@ -1053,6 +1071,12 @@ static const struct net_device_ops lowpan_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
+static struct ieee802154_mlme_ops lowpan_mlme = {
+	.get_pan_id = lowpan_get_pan_id,
+	.get_phy = lowpan_get_phy,
+	.get_short_addr = lowpan_get_short_addr,
+};
+
 static void lowpan_setup(struct net_device *dev)
 {
 	pr_debug("(%s)\n", __func__);
@@ -1070,6 +1094,7 @@ static void lowpan_setup(struct net_device *dev)
 
 	dev->netdev_ops		= &lowpan_netdev_ops;
 	dev->header_ops		= &lowpan_header_ops;
+	dev->ml_priv		= &lowpan_mlme;
 	dev->destructor		= lowpan_dev_free;
 }
 
-- 
cgit v1.2.3


From 8deff4af8745561efd2be34ee30cc57a6f0107c6 Mon Sep 17 00:00:00 2001
From: "alex.bluesman.smirnov@gmail.com" <alex.bluesman.smirnov@gmail.com>
Date: Wed, 25 Apr 2012 23:24:57 +0000
Subject: 6lowpan: clean up fragments list if module unloaded

Clean all the pending fragments and relative timers if 6lowpan link
is going to be deleted.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index f6b169439b5..b3021f76520 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1177,11 +1177,20 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
 	struct net_device *real_dev = lowpan_dev->real_dev;
-	struct lowpan_dev_record *entry;
-	struct lowpan_dev_record *tmp;
+	struct lowpan_dev_record *entry, *tmp;
+	struct lowpan_fragment *frame, *tframe;
 
 	ASSERT_RTNL();
 
+	spin_lock(&flist_lock);
+	list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) {
+		del_timer(&frame->timer);
+		list_del(&frame->list);
+		dev_kfree_skb(frame->skb);
+		kfree(frame);
+	}
+	spin_unlock(&flist_lock);
+
 	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
 	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
 		if (entry->ldev == dev) {
-- 
cgit v1.2.3


From 768f7c7c121e80f458a9d013b2e8b169e5dfb1e5 Mon Sep 17 00:00:00 2001
From: "alex.bluesman.smirnov@gmail.com" <alex.bluesman.smirnov@gmail.com>
Date: Wed, 25 Apr 2012 23:24:58 +0000
Subject: 6lowpan: add missing spin_lock_init()

Add missing spin_lock_init() for frames list lock.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index b3021f76520..840821b90bc 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1168,6 +1168,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	list_add_tail(&entry->list, &lowpan_devices);
 	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
 
+	spin_lock_init(&flist_lock);
+
 	register_netdevice(dev);
 
 	return 0;
-- 
cgit v1.2.3


From 651913ce9de2bbcedef608c5d6cf39c244248509 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 27 Apr 2012 11:29:37 -0400
Subject: tcp: clean up use of jiffies in tcp_rcv_rtt_measure()

Clean up a reference to jiffies in tcp_rcv_rtt_measure() that should
instead reference tcp_time_stamp. Since the result of the subtraction
is passed into a function taking u32, this should not change any
behavior (and indeed the generated assembly does not change on
x86_64). However, it seems worth cleaning this up for consistency and
clarity (and perhaps to avoid bugs if this is copied and pasted
somewhere else).

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ff36406537..2a702e3a4ae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -495,7 +495,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 		goto new_measure;
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
-	tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
+	tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
 
 new_measure:
 	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
-- 
cgit v1.2.3


From a4dff1bc492ee4a2184d384ae8b5bcab5859e150 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 20 Apr 2012 18:11:02 +0400
Subject: SUNRPC: skip dead but not buried clients on PipeFS events

These clients can't be safely dereferenced if their counter in 0.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 67972462a54..d10ebc4310f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -218,7 +218,8 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
 		if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
 		    ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
 			continue;
-		atomic_inc(&clnt->cl_count);
+		if (atomic_inc_not_zero(&clnt->cl_count) == 0)
+			continue;
 		spin_unlock(&sn->rpc_client_lock);
 		return clnt;
 	}
-- 
cgit v1.2.3


From 7aab449e5a2ebfa9c5116e87e16536bc4195e4de Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 20 Apr 2012 18:19:18 +0400
Subject: SUNRPC: skip clients with program without PipeFS entries

1) This is sane.
2) Otherwise there will be soft lockup:

do {
	rpc_get_client_for_event (clnt->cl_dentry == NULL ==> choose)
	__rpc_pipefs_event (clnt->cl_program->pipe_dir_name == NULL ==> return)
} while (1)

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d10ebc4310f..8a19849ff34 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,8 +184,6 @@ static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
 
 	switch (event) {
 	case RPC_PIPEFS_MOUNT:
-		if (clnt->cl_program->pipe_dir_name == NULL)
-			break;
 		dentry = rpc_setup_pipedir_sb(sb, clnt,
 					      clnt->cl_program->pipe_dir_name);
 		BUG_ON(dentry == NULL);
@@ -215,6 +213,8 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
 
 	spin_lock(&sn->rpc_client_lock);
 	list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
+		if (clnt->cl_program->pipe_dir_name == NULL)
+			break;
 		if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
 		    ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
 			continue;
-- 
cgit v1.2.3


From 37629b572cc4e80fc24b4139a24df1a89415d534 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 20 Apr 2012 18:19:56 +0400
Subject: SUNRPC: set per-net PipeFS superblock before notification

There can be a case, when on MOUNT event RPC client (after it's dentries were
created) is not longer hold by anyone except notification callback.
I.e. on release this client will be destoroyed. And it's dentries have to be
destroyed as well. Which in turn requires per-net PipeFS superblock to be set.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0af37fc4681..3b62cf28803 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1126,19 +1126,20 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 	dprintk("RPC:	sending pipefs MOUNT notification for net %p%s\n", net,
 								NET_NAME(net));
+	sn->pipefs_sb = sb;
 	err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_MOUNT,
 					   sb);
 	if (err)
 		goto err_depopulate;
 	sb->s_fs_info = get_net(net);
-	sn->pipefs_sb = sb;
 	return 0;
 
 err_depopulate:
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,
 					   sb);
+	sn->pipefs_sb = NULL;
 	__rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
 	return err;
 }
-- 
cgit v1.2.3


From ea8cfa06795bb30d2ea61f503ef129284492c06a Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 27 Apr 2012 13:00:17 +0400
Subject: SUNRPC: traverse clients tree on PipeFS event

v2: recursion was replaced by loop

If client is a clone, then it's parent can not be in the list.
But parent's Pipefs dentries have to be created and destroyed.

Note: event skip helper for clients introduced

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8a19849ff34..d127bd74752 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -176,8 +176,16 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name)
 	return 0;
 }
 
-static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
-				struct super_block *sb)
+static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
+{
+	if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
+	    ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
+		return 1;
+	return 0;
+}
+
+static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
+				   struct super_block *sb)
 {
 	struct dentry *dentry;
 	int err = 0;
@@ -206,6 +214,20 @@ static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
 	return err;
 }
 
+static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
+				struct super_block *sb)
+{
+	int error = 0;
+
+	for (;; clnt = clnt->cl_parent) {
+		if (!rpc_clnt_skip_event(clnt, event))
+			error = __rpc_clnt_handle_event(clnt, event, sb);
+		if (error || clnt == clnt->cl_parent)
+			break;
+	}
+	return error;
+}
+
 static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
@@ -215,8 +237,7 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
 	list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
 		if (clnt->cl_program->pipe_dir_name == NULL)
 			break;
-		if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
-		    ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
+		if (rpc_clnt_skip_event(clnt, event))
 			continue;
 		if (atomic_inc_not_zero(&clnt->cl_count) == 0)
 			continue;
-- 
cgit v1.2.3


From cde2e9a651b76d8db36ae94cd0febc82b637e5dd Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 27 Apr 2012 10:11:48 +0000
Subject: drop_monitor: fix sleeping in invalid context warning

Eric Dumazet pointed out this warning in the drop_monitor protocol to me:

[   38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85
[   38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch
[   38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71
[   38.352582] Call Trace:
[   38.352592]  [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0
[   38.352599]  [<ffffffff81063f2a>] __might_sleep+0xca/0xf0
[   38.352606]  [<ffffffff81655b16>] mutex_lock+0x26/0x50
[   38.352610]  [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0
[   38.352616]  [<ffffffff810b72d9>] tracepoint_probe_register+0x29/0x90
[   38.352621]  [<ffffffff8153a585>] set_all_monitor_traces+0x105/0x170
[   38.352625]  [<ffffffff8153a8ca>] net_dm_cmd_trace+0x2a/0x40
[   38.352630]  [<ffffffff8154a81a>] genl_rcv_msg+0x21a/0x2b0
[   38.352636]  [<ffffffff810f8029>] ? zone_statistics+0x99/0xc0
[   38.352640]  [<ffffffff8154a600>] ? genl_rcv+0x30/0x30
[   38.352645]  [<ffffffff8154a059>] netlink_rcv_skb+0xa9/0xd0
[   38.352649]  [<ffffffff8154a5f0>] genl_rcv+0x20/0x30
[   38.352653]  [<ffffffff81549a7e>] netlink_unicast+0x1ae/0x1f0
[   38.352658]  [<ffffffff81549d76>] netlink_sendmsg+0x2b6/0x310
[   38.352663]  [<ffffffff8150824f>] sock_sendmsg+0x10f/0x130
[   38.352668]  [<ffffffff8150abe0>] ? move_addr_to_kernel+0x60/0xb0
[   38.352673]  [<ffffffff81515f04>] ? verify_iovec+0x64/0xe0
[   38.352677]  [<ffffffff81509c46>] __sys_sendmsg+0x386/0x390
[   38.352682]  [<ffffffff810ffaf9>] ? handle_mm_fault+0x139/0x210
[   38.352687]  [<ffffffff8165b5bc>] ? do_page_fault+0x1ec/0x4f0
[   38.352693]  [<ffffffff8106ba4d>] ? set_next_entity+0x9d/0xb0
[   38.352699]  [<ffffffff81310b49>] ? tty_ldisc_deref+0x9/0x10
[   38.352703]  [<ffffffff8106d363>] ? pick_next_task_fair+0x63/0x140
[   38.352708]  [<ffffffff8150b8d4>] sys_sendmsg+0x44/0x80
[   38.352713]  [<ffffffff8165f8e2>] system_call_fastpath+0x16/0x1b

It stems from holding a spinlock (trace_state_lock) while attempting to register
or unregister tracepoint hooks, making in_atomic() true in this context, leading
to the warning when the tracepoint calls might_sleep() while its taking a mutex.
Since we only use the trace_state_lock to prevent trace protocol state races, as
well as hardware stat list updates on an rcu write side, we can just convert the
spinlock to a mutex to avoid this problem.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5c3c81a609e..a221a5bbecf 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -42,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused);
  * netlink alerts
  */
 static int trace_state = TRACE_OFF;
-static DEFINE_SPINLOCK(trace_state_lock);
+static DEFINE_MUTEX(trace_state_mutex);
 
 struct per_cpu_dm_data {
 	struct work_struct dm_alert_work;
@@ -214,7 +214,7 @@ static int set_all_monitor_traces(int state)
 	struct dm_hw_stat_delta *new_stat = NULL;
 	struct dm_hw_stat_delta *temp;
 
-	spin_lock(&trace_state_lock);
+	mutex_lock(&trace_state_mutex);
 
 	if (state == trace_state) {
 		rc = -EAGAIN;
@@ -253,7 +253,7 @@ static int set_all_monitor_traces(int state)
 		rc = -EINPROGRESS;
 
 out_unlock:
-	spin_unlock(&trace_state_lock);
+	mutex_unlock(&trace_state_mutex);
 
 	return rc;
 }
@@ -296,12 +296,12 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 
 		new_stat->dev = dev;
 		new_stat->last_rx = jiffies;
-		spin_lock(&trace_state_lock);
+		mutex_lock(&trace_state_mutex);
 		list_add_rcu(&new_stat->list, &hw_stats_list);
-		spin_unlock(&trace_state_lock);
+		mutex_unlock(&trace_state_mutex);
 		break;
 	case NETDEV_UNREGISTER:
-		spin_lock(&trace_state_lock);
+		mutex_lock(&trace_state_mutex);
 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 			if (new_stat->dev == dev) {
 				new_stat->dev = NULL;
@@ -312,7 +312,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 				}
 			}
 		}
-		spin_unlock(&trace_state_lock);
+		mutex_unlock(&trace_state_mutex);
 		break;
 	}
 out:
-- 
cgit v1.2.3


From 3885ca785a3618593226687ced84f3f336dc3860 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 27 Apr 2012 10:11:49 +0000
Subject: drop_monitor: Make updating data->skb smp safe

Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in
its smp protections.  Specifically, its possible to replace data->skb while its
being written.  This patch corrects that by making data->skb an rcu protected
variable.  That will prevent it from being overwritten while a tracepoint is
modifying it.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 70 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index a221a5bbecf..7592943513e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex);
 
 struct per_cpu_dm_data {
 	struct work_struct dm_alert_work;
-	struct sk_buff *skb;
+	struct sk_buff __rcu *skb;
 	atomic_t dm_hit_count;
 	struct timer_list send_timer;
 };
@@ -73,35 +73,58 @@ static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
+static int initialized = 0;
 
 static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
 	struct net_dm_alert_msg *msg;
 	struct nlattr *nla;
+	struct sk_buff *skb;
+	struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
 
 	al = sizeof(struct net_dm_alert_msg);
 	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
 	al += sizeof(struct nlattr);
 
-	data->skb = genlmsg_new(al, GFP_KERNEL);
-	genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
-			0, NET_DM_CMD_ALERT);
-	nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
-	msg = nla_data(nla);
-	memset(msg, 0, al);
-	atomic_set(&data->dm_hit_count, dm_hit_limit);
+	skb = genlmsg_new(al, GFP_KERNEL);
+
+	if (skb) {
+		genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+				0, NET_DM_CMD_ALERT);
+		nla = nla_reserve(skb, NLA_UNSPEC,
+				  sizeof(struct net_dm_alert_msg));
+		msg = nla_data(nla);
+		memset(msg, 0, al);
+	} else if (initialized)
+		schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+
+	/*
+	 * Don't need to lock this, since we are guaranteed to only
+	 * run this on a single cpu at a time.
+	 * Note also that we only update data->skb if the old and new skb
+	 * pointers don't match.  This ensures that we don't continually call
+	 * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
+	 */
+	if (skb != oskb) {
+		rcu_assign_pointer(data->skb, skb);
+
+		synchronize_rcu();
+
+		atomic_set(&data->dm_hit_count, dm_hit_limit);
+	}
+
 }
 
 static void send_dm_alert(struct work_struct *unused)
 {
 	struct sk_buff *skb;
-	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
 
 	/*
 	 * Grab the skb we're about to send
 	 */
-	skb = data->skb;
+	skb = rcu_dereference_protected(data->skb, 1);
 
 	/*
 	 * Replace it with a new one
@@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused)
 	/*
 	 * Ship it!
 	 */
-	genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+	if (skb)
+		genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
 
+	put_cpu_var(dm_cpu_data);
 }
 
 /*
@@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused)
  */
 static void sched_send_work(unsigned long unused)
 {
-	struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
+	struct per_cpu_dm_data *data =  &get_cpu_var(dm_cpu_data);
+
+	schedule_work_on(smp_processor_id(), &data->dm_alert_work);
 
-	schedule_work(&data->dm_alert_work);
+	put_cpu_var(dm_cpu_data);
 }
 
 static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 	struct nlmsghdr *nlh;
 	struct nlattr *nla;
 	int i;
-	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+	struct sk_buff *dskb;
+	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
 
 
+	rcu_read_lock();
+	dskb = rcu_dereference(data->skb);
+
+	if (!dskb)
+		goto out;
+
 	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
 		/*
 		 * we're already at zero, discard this hit
@@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 		goto out;
 	}
 
-	nlh = (struct nlmsghdr *)data->skb->data;
+	nlh = (struct nlmsghdr *)dskb->data;
 	nla = genlmsg_data(nlmsg_data(nlh));
 	msg = nla_data(nla);
 	for (i = 0; i < msg->entries; i++) {
@@ -158,7 +192,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 	/*
 	 * We need to create a new entry
 	 */
-	__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+	__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
 	nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
 	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
 	msg->points[msg->entries].count = 1;
@@ -170,6 +204,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 	}
 
 out:
+	rcu_read_unlock();
+	put_cpu_var(dm_cpu_data);
 	return;
 }
 
@@ -375,6 +411,8 @@ static int __init init_net_drop_monitor(void)
 		data->send_timer.function = sched_send_work;
 	}
 
+	initialized = 1;
+
 	goto out;
 
 out_unreg:
-- 
cgit v1.2.3


From 4b984cd50bc1b6d492175cd77bfabb78e76ffa67 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Thu, 26 Apr 2012 09:45:34 +0200
Subject: ipvs: null check of net->ipvs in lblc(r) shedulers

Avoid crash when registering shedulers after
the IPVS core initialization for netns fails. Do this by
checking for present core (net->ipvs).

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_lblc.c  | 3 +++
 net/netfilter/ipvs/ip_vs_lblcr.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 0f16283fd05..caa43704e55 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	if (!ipvs)
+		return -ENOENT;
+
 	if (!net_eq(net, &init_net)) {
 		ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
 						sizeof(vs_vars_table),
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index eec797f8cce..548bf37aa29 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	if (!ipvs)
+		return -ENOENT;
+
 	if (!net_eq(net, &init_net)) {
 		ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
 						sizeof(vs_vars_table),
-- 
cgit v1.2.3


From 582b8e3eadaec77788c1aa188081a8d5059c42a6 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Thu, 26 Apr 2012 09:45:35 +0200
Subject: ipvs: take care of return value from protocol init_netns

ip_vs_create_timeout_table() can return NULL
All functions protocol init_netns is affected of this patch.

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                   |  2 +-
 net/netfilter/ipvs/ip_vs_proto.c      | 11 +++++++++--
 net/netfilter/ipvs/ip_vs_proto_sctp.c |  5 ++++-
 net/netfilter/ipvs/ip_vs_proto_tcp.c  |  5 ++++-
 net/netfilter/ipvs/ip_vs_proto_udp.c  |  5 ++++-
 5 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 2bdee51ba30..6d90dda2ddb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -393,7 +393,7 @@ struct ip_vs_protocol {
 
 	void (*exit)(struct ip_vs_protocol *pp);
 
-	void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+	int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
 
 	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
 
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index a62360e2903..ed835e67a07 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -78,8 +78,15 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 	ipvs->proto_data_table[hash] = pd;
 	atomic_set(&pd->appcnt, 0);	/* Init app counter */
 
-	if (pp->init_netns != NULL)
-		pp->init_netns(net, pd);
+	if (pp->init_netns != NULL) {
+		int ret = pp->init_netns(net, pd);
+		if (ret) {
+			/* unlink an free proto data */
+			ipvs->proto_data_table[hash] = pd->next;
+			kfree(pd);
+			return ret;
+		}
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1fbf7a2816f..9f3fb751c49 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1090,7 +1090,7 @@ out:
  *   timeouts is netns related now.
  * ---------------------------------------------
  */
-static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 	spin_lock_init(&ipvs->sctp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
 							sizeof(sctp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
+	return 0;
 }
 
 static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index ef8641f7af8..cd609cc6272 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
  *   timeouts is netns related now.
  * ---------------------------------------------
  */
-static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 	spin_lock_init(&ipvs->tcp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
 							sizeof(tcp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
 	pd->tcp_state_table =  tcp_states;
+	return 0;
 }
 
 static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index f4b7262896b..2fedb2dcb3d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
 	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 }
 
-static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 	spin_lock_init(&ipvs->udp_app_lock);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 							sizeof(udp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
+	return 0;
 }
 
 static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
-- 
cgit v1.2.3


From 8537de8a7ab6681cc72fb0411ab1ba7fdba62dd0 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Thu, 26 Apr 2012 07:47:44 +0200
Subject: ipvs: kernel oops - do_ip_vs_get_ctl

Change order of init so netns init is ready
when register ioctl and netlink.

Ver2
	Whitespace fixes and __init added.

Reported-by: "Ryan O'Hara" <rohara@redhat.com>
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |  2 ++
 net/netfilter/ipvs/ip_vs_core.c |  9 +++++++
 net/netfilter/ipvs/ip_vs_ctl.c  | 52 ++++++++++++++++++++++++-----------------
 3 files changed, 41 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 6d90dda2ddb..72522f08737 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1203,6 +1203,8 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 
 extern int ip_vs_use_count_inc(void);
 extern void ip_vs_use_count_dec(void);
+extern int ip_vs_register_nl_ioctl(void);
+extern void ip_vs_unregister_nl_ioctl(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 260b9ef8877..00bdb1d9d69 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1995,10 +1995,18 @@ static int __init ip_vs_init(void)
 		goto cleanup_dev;
 	}
 
+	ret = ip_vs_register_nl_ioctl();
+	if (ret < 0) {
+		pr_err("can't register netlink/ioctl.\n");
+		goto cleanup_hooks;
+	}
+
 	pr_info("ipvs loaded.\n");
 
 	return ret;
 
+cleanup_hooks:
+	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 cleanup_dev:
 	unregister_pernet_device(&ipvs_core_dev_ops);
 cleanup_sub:
@@ -2014,6 +2022,7 @@ exit:
 
 static void __exit ip_vs_cleanup(void)
 {
+	ip_vs_unregister_nl_ioctl();
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	unregister_pernet_device(&ipvs_core_dev_ops);
 	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 376d2b12d58..f5589987fc8 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3750,21 +3750,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 	free_percpu(ipvs->tot_stats.cpustats);
 }
 
-int __init ip_vs_control_init(void)
+int __init ip_vs_register_nl_ioctl(void)
 {
-	int idx;
 	int ret;
 
-	EnterFunction(2);
-
-	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
-		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
-	}
-
-	smp_wmb();	/* Do we really need it now ? */
-
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
@@ -3776,28 +3765,47 @@ int __init ip_vs_control_init(void)
 		pr_err("cannot register Generic Netlink interface.\n");
 		goto err_genl;
 	}
-
-	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
-	if (ret < 0)
-		goto err_notf;
-
-	LeaveFunction(2);
 	return 0;
 
-err_notf:
-	ip_vs_genl_unregister();
 err_genl:
 	nf_unregister_sockopt(&ip_vs_sockopts);
 err_sock:
 	return ret;
 }
 
+void ip_vs_unregister_nl_ioctl(void)
+{
+	ip_vs_genl_unregister();
+	nf_unregister_sockopt(&ip_vs_sockopts);
+}
+
+int __init ip_vs_control_init(void)
+{
+	int idx;
+	int ret;
+
+	EnterFunction(2);
+
+	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+
+	smp_wmb();	/* Do we really need it now ? */
+
+	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+	if (ret < 0)
+		return ret;
+
+	LeaveFunction(2);
+	return 0;
+}
+
 
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
 	unregister_netdevice_notifier(&ip_vs_dst_notifier);
-	ip_vs_genl_unregister();
-	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
cgit v1.2.3


From 6cf51852486af3d79f57bf46d00209a14244dbaa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 27 Apr 2012 02:00:50 +0200
Subject: netfilter: xt_CT: fix wrong checking in the timeout assignment path

The current checking always succeeded. We have to check the first
character of the string to check that it's empty, thus, skipping
the timeout path.

This fixes the use of the CT target without the timeout option.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 59530e93fa5..3746d8b9a47 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -227,7 +227,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	if (info->timeout) {
+	if (info->timeout[0]) {
 		typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
 		struct nf_conn_timeout *timeout_ext;
 
-- 
cgit v1.2.3


From cbbb34498f8b2b26cbdc79532c8a2ee5cd1e756a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 30 Apr 2012 11:52:40 -0400
Subject: SUNRPC: RPC client must use the current utsname hostname string

Now that the rpc client is namespace aware, it needs to use the
utsname of the process that created it instead of using the
init_utsname. Both rpc_new_client and rpc_clone_client need to
be fixed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 net/sunrpc/clnt.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d127bd74752..adf2990aceb 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -279,6 +279,14 @@ void rpc_clients_notifier_unregister(void)
 	return rpc_pipefs_notifier_unregister(&rpc_clients_block);
 }
 
+static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
+{
+	clnt->cl_nodelen = strlen(nodename);
+	if (clnt->cl_nodelen > UNX_MAXNODENAME)
+		clnt->cl_nodelen = UNX_MAXNODENAME;
+	memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen);
+}
+
 static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
 {
 	const struct rpc_program *program = args->program;
@@ -359,10 +367,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(init_utsname()->nodename);
-	if (clnt->cl_nodelen > UNX_MAXNODENAME)
-		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
+	rpc_clnt_set_nodename(clnt, utsname()->nodename);
 	rpc_register_client(clnt);
 	return clnt;
 
@@ -521,6 +526,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
 		goto out_no_path;
+	rpc_clnt_set_nodename(new, utsname()->nodename);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 	atomic_inc(&clnt->cl_count);
-- 
cgit v1.2.3


From 1cebce36d660c83bd1353e41f3e66abd4686f215 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 30 Apr 2012 06:00:18 +0000
Subject: tcp: fix infinite cwnd in tcp_complete_cwr()

When the cwnd reduction is done, ssthresh may be infinite
if TCP enters CWR via ECN or F-RTO. If cwnd is not undone, i.e.,
undo_marker is set, tcp_complete_cwr() falsely set cwnd to the
infinite ssthresh value. The correct operation is to keep cwnd
intact because it has been updated in ECN or F-RTO.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2a702e3a4ae..d99efd7dfb6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2868,11 +2868,14 @@ static inline void tcp_complete_cwr(struct sock *sk)
 
 	/* Do not moderate cwnd if it's already undone in cwr or recovery. */
 	if (tp->undo_marker) {
-		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
+		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
 			tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-		else /* PRR */
+			tp->snd_cwnd_stamp = tcp_time_stamp;
+		} else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
+			/* PRR algorithm. */
 			tp->snd_cwnd = tp->snd_ssthresh;
-		tp->snd_cwnd_stamp = tcp_time_stamp;
+			tp->snd_cwnd_stamp = tcp_time_stamp;
+		}
 	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
-- 
cgit v1.2.3


From 66f2c99af3d6f2d0aa1120884cf1c60613ef61c0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 29 Apr 2012 15:44:16 +0200
Subject: mac80211: fix AP mode EAP tx for VLAN stations

EAP frames for stations in an AP VLAN are sent on the main AP interface
to avoid race conditions wrt. moving stations.
For that to work properly, sta_info_get_bss must be used instead of
sta_info_get when sending EAP packets.
Previously this was only done for cooked monitor injected packets, so
this patch adds a check for tx->skb->protocol to the same place.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 782a60198df..e76facc69e9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1158,7 +1158,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		tx->sta = rcu_dereference(sdata->u.vlan.sta);
 		if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
 			return TX_DROP;
-	} else if (info->flags & IEEE80211_TX_CTL_INJECTED) {
+	} else if (info->flags & IEEE80211_TX_CTL_INJECTED ||
+		   tx->sdata->control_port_protocol == tx->skb->protocol) {
 		tx->sta = sta_info_get_bss(sdata, hdr->addr1);
 	}
 	if (!tx->sta)
-- 
cgit v1.2.3


From 116a0fc31c6c9b8fc821be5a96e5bf0b43260131 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Apr 2012 09:08:22 +0000
Subject: netem: fix possible skb leak

skb_checksum_help(skb) can return an error, we must free skb in this
case. qdisc_drop(skb, sch) can also be feeded with a NULL skb (if
skb_unshare() failed), so lets use this generic helper.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5da548fa7ae..ebd22966f74 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -408,10 +408,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
-		     skb_checksum_help(skb))) {
-			sch->qstats.drops++;
-			return NET_XMIT_DROP;
-		}
+		     skb_checksum_help(skb)))
+			return qdisc_drop(skb, sch);
 
 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 	}
-- 
cgit v1.2.3


From 4fdcfa12843bca38d0c9deff70c8720e4e8f515f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 1 May 2012 08:18:02 +0000
Subject: drop_monitor: prevent init path from scheduling on the wrong cpu

I just noticed after some recent updates, that the init path for the drop
monitor protocol has a minor error.  drop monitor maintains a per cpu structure,
that gets initalized from a single cpu.  Normally this is fine, as the protocol
isn't in use yet, but I recently made a change that causes a failed skb
allocation to reschedule itself .  Given the current code, the implication is
that this workqueue reschedule will take place on the wrong cpu.  If drop
monitor is used early during the boot process, its possible that two cpus will
access a single per-cpu structure in parallel, possibly leading to data
corruption.

This patch fixes the situation, by storing the cpu number that a given instance
of this per-cpu data should be accessed from.  In the case of a need for a
reschedule, the cpu stored in the struct is assigned the rescheule, rather than
the currently executing cpu

Tested successfully by myself.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7592943513e..a7cad741df0 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -49,6 +49,7 @@ struct per_cpu_dm_data {
 	struct sk_buff __rcu *skb;
 	atomic_t dm_hit_count;
 	struct timer_list send_timer;
+	int cpu;
 };
 
 struct dm_hw_stat_delta {
@@ -73,7 +74,6 @@ static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
-static int initialized = 0;
 
 static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
@@ -96,8 +96,8 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 				  sizeof(struct net_dm_alert_msg));
 		msg = nla_data(nla);
 		memset(msg, 0, al);
-	} else if (initialized)
-		schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+	} else
+		schedule_work_on(data->cpu, &data->dm_alert_work);
 
 	/*
 	 * Don't need to lock this, since we are guaranteed to only
@@ -121,6 +121,8 @@ static void send_dm_alert(struct work_struct *unused)
 	struct sk_buff *skb;
 	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
 
+	WARN_ON_ONCE(data->cpu != smp_processor_id());
+
 	/*
 	 * Grab the skb we're about to send
 	 */
@@ -404,14 +406,14 @@ static int __init init_net_drop_monitor(void)
 
 	for_each_present_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
-		reset_per_cpu_data(data);
+		data->cpu = cpu;
 		INIT_WORK(&data->dm_alert_work, send_dm_alert);
 		init_timer(&data->send_timer);
 		data->send_timer.data = cpu;
 		data->send_timer.function = sched_send_work;
+		reset_per_cpu_data(data);
 	}
 
-	initialized = 1;
 
 	goto out;
 
-- 
cgit v1.2.3


From 84768edbb2721637620b2d84501bb0d5aed603f1 Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Wed, 2 May 2012 03:58:43 +0000
Subject: net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg

l2tp_ip_sendmsg could return without releasing socket lock, making it all the
way to userspace, and generating the following warning:

[  130.891594] ================================================
[  130.894569] [ BUG: lock held when returning to user space! ]
[  130.897257] 3.4.0-rc5-next-20120501-sasha #104 Tainted: G        W
[  130.900336] ------------------------------------------------
[  130.902996] trinity/8384 is leaving the kernel with locks still held!
[  130.906106] 1 lock held by trinity/8384:
[  130.907924]  #0:  (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff82b9503f>] l2tp_ip_sendmsg+0x2f/0x550

Introduced by commit 2f16270 ("l2tp: Fix locking in l2tp_ip.c").

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 585d93ecee2..6274f0be82b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -442,8 +442,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 
 		daddr = lip->l2tp_addr.s_addr;
 	} else {
+		rc = -EDESTADDRREQ;
 		if (sk->sk_state != TCP_ESTABLISHED)
-			return -EDESTADDRREQ;
+			goto out;
 
 		daddr = inet->inet_daddr;
 		connected = 1;
-- 
cgit v1.2.3


From b49960a05e32121d29316cfdf653894b88ac9190 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 May 2012 02:28:41 +0000
Subject: tcp: change tcp_adv_win_scale and tcp_rmem[2]

tcp_adv_win_scale default value is 2, meaning we expect a good citizen
skb to have skb->len / skb->truesize ratio of 75% (3/4)

In 2.6 kernels we (mis)accounted for typical MSS=1460 frame :
1536 + 64 + 256 = 1856 'estimated truesize', and 1856 * 3/4 = 1392.
So these skbs were considered as not bloated.

With recent truesize fixes, a typical MSS=1460 frame truesize is now the
more precise :
2048 + 256 = 2304. But 2304 * 3/4 = 1728.
So these skb are not good citizen anymore, because 1460 < 1728

(GRO can escape this problem because it build skbs with a too low
truesize.)

This also means tcp advertises a too optimistic window for a given
allocated rcvspace : When receiving frames, sk_rmem_alloc can hit
sk_rcvbuf limit and we call tcp_prune_queue()/tcp_collapse() too often,
especially when application is slow to drain its receive queue or in
case of losses (netperf is fast, scp is slow). This is a major latency
source.

We should adjust the len/truesize ratio to 50% instead of 75%

This patch :

1) changes tcp_adv_win_scale default to 1 instead of 2

2) increase tcp_rmem[2] limit from 4MB to 6MB to take into account
better truesize tracking and to allow autotuning tcp receive window to
reach same value than before. Note that same amount of kernel memory is
consumed compared to 2.6 kernels.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 4 ++--
 net/ipv4/tcp.c                         | 9 +++++----
 net/ipv4/tcp_input.c                   | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index bd80ba5847d..1619a8c8087 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -147,7 +147,7 @@ tcp_adv_win_scale - INTEGER
 	(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
 	if it is <= 0.
 	Possible values are [-31, 31], inclusive.
-	Default: 2
+	Default: 1
 
 tcp_allowed_congestion_control - STRING
 	Show/set the congestion control choices available to non-privileged
@@ -410,7 +410,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
 	net.core.rmem_max.  Calling setsockopt() with SO_RCVBUF disables
 	automatic tuning of that socket's receive buffer size, in which
 	case this value is ignored.
-	Default: between 87380B and 4MB, depending on RAM size.
+	Default: between 87380B and 6MB, depending on RAM size.
 
 tcp_sack - BOOLEAN
 	Enable select acknowledgments (SACKS).
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8bb6adeb62c..1272a88c2a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3243,7 +3243,7 @@ void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 	unsigned long limit;
-	int max_share, cnt;
+	int max_rshare, max_wshare, cnt;
 	unsigned int i;
 	unsigned long jiffy = jiffies;
 
@@ -3303,15 +3303,16 @@ void __init tcp_init(void)
 	tcp_init_mem(&init_net);
 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-	max_share = min(4UL*1024*1024, limit);
+	max_wshare = min(4UL*1024*1024, limit);
+	max_rshare = min(6UL*1024*1024, limit);
 
 	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
 	sysctl_tcp_wmem[1] = 16*1024;
-	sysctl_tcp_wmem[2] = max(64*1024, max_share);
+	sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
 	sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
 	sysctl_tcp_rmem[1] = 87380;
-	sysctl_tcp_rmem[2] = max(87380, max_share);
+	sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
 	pr_info("Hash tables configured (established %u bind %u)\n",
 		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d99efd7dfb6..257b61789ee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2;
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 2;
+int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 int sysctl_tcp_stdurg __read_mostly;
-- 
cgit v1.2.3