From 8759fec4af222f338d08f8f1a7ad6a77ca6cb301 Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@free-electrons.com>
Date: Wed, 14 Dec 2016 15:15:07 +0100
Subject: crypto: marvell - Copy IVDIG before launching partial DMA ahash
 requests

Currently, inner IV/DIGEST data are only copied once into the hash
engines and not set explicitly before launching a request that is not a
first frag. This is an issue especially when multiple ahash reqs are
computed in parallel or chained with cipher request, as the state of the
request being computed is not updated into the hash engine. It leads to
non-deterministic corrupted digest results.

Fixes: commit 2786cee8e50b ("crypto: marvell - Move SRAM I/O operations to step functions")
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/cesa.h |  3 ++-
 drivers/crypto/marvell/hash.c | 34 +++++++++++++++++++++++++++++++++-
 drivers/crypto/marvell/tdma.c |  9 ++++++++-
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index a768da7138a1..b7872f62f674 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -273,7 +273,8 @@ struct mv_cesa_op_ctx {
 #define CESA_TDMA_SRC_IN_SRAM			BIT(30)
 #define CESA_TDMA_END_OF_REQ			BIT(29)
 #define CESA_TDMA_BREAK_CHAIN			BIT(28)
-#define CESA_TDMA_TYPE_MSK			GENMASK(27, 0)
+#define CESA_TDMA_SET_STATE			BIT(27)
+#define CESA_TDMA_TYPE_MSK			GENMASK(26, 0)
 #define CESA_TDMA_DUMMY				0
 #define CESA_TDMA_DATA				1
 #define CESA_TDMA_OP				2
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 2a9260559654..585c90f9f606 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -281,13 +281,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
 	sreq->offset = 0;
 }
 
+static void mv_cesa_ahash_dma_step(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_req *base = &creq->base;
+
+	/* We must explicitly set the digest state. */
+	if (base->chain.first->flags & CESA_TDMA_SET_STATE) {
+		struct mv_cesa_engine *engine = base->engine;
+		int i;
+
+		/* Set the hash state in the IVDIG regs. */
+		for (i = 0; i < ARRAY_SIZE(creq->state); i++)
+			writel_relaxed(creq->state[i], engine->regs +
+				       CESA_IVDIG(i));
+	}
+
+	mv_cesa_dma_step(base);
+}
+
 static void mv_cesa_ahash_step(struct crypto_async_request *req)
 {
 	struct ahash_request *ahashreq = ahash_request_cast(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
 
 	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
-		mv_cesa_dma_step(&creq->base);
+		mv_cesa_ahash_dma_step(ahashreq);
 	else
 		mv_cesa_ahash_std_step(ahashreq);
 }
@@ -585,12 +604,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	struct mv_cesa_ahash_dma_iter iter;
 	struct mv_cesa_op_ctx *op = NULL;
 	unsigned int frag_len;
+	bool set_state = false;
 	int ret;
 	u32 type;
 
 	basereq->chain.first = NULL;
 	basereq->chain.last = NULL;
 
+	if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl))
+		set_state = true;
+
 	if (creq->src_nents) {
 		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
 				 DMA_TO_DEVICE);
@@ -684,6 +707,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	if (type != CESA_TDMA_RESULT)
 		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
 
+	if (set_state) {
+		/*
+		 * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to
+		 * let the step logic know that the IVDIG registers should be
+		 * explicitly set before launching a TDMA chain.
+		 */
+		basereq->chain.first->flags |= CESA_TDMA_SET_STATE;
+	}
+
 	return 0;
 
 err_free_tdma:
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 4416b88eca70..c76375ff376d 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
 		last->next = dreq->chain.first;
 		engine->chain.last = dreq->chain.last;
 
-		if (!(last->flags & CESA_TDMA_BREAK_CHAIN))
+		/*
+		 * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
+		 * the last element of the current chain, or if the request
+		 * being queued needs the IV regs to be set before lauching
+		 * the request.
+		 */
+		if (!(last->flags & CESA_TDMA_BREAK_CHAIN) &&
+		    !(dreq->chain.first->flags & CESA_TDMA_SET_STATE))
 			last->next_dma = dreq->chain.first->cur_dma;
 	}
 }
-- 
cgit v1.2.3


From c8b1b3dd89ea7b3f77a73e59c4c4495e16338e15 Mon Sep 17 00:00:00 2001
From: Brendan McGrath <redmcg@redmandi.dyndns.org>
Date: Sat, 10 Dec 2016 21:20:42 +1100
Subject: HID: asus: Fix keyboard support

The previous submission which added Touchpad support broke the
Keyboard support of this driver. This patch:
1. fixes the Keyboard support (by assigning drvdata->input);
2. renames NOTEBOOK_QUIRKS to KEYBOARD_QUIRKS;
3. adds the NO_INIT_REPORT quirk to the KEYBOARD_QUIRKS; and
4. sets the input->name to 'Asus Keyboard' for the keyboard

Signed-off-by: Brendan McGrath <redmcg@redmandi.dyndns.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-asus.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index d40ed9fdf68d..70b12f89a193 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -64,7 +64,8 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_SKIP_INPUT_MAPPING	BIT(2)
 #define QUIRK_IS_MULTITOUCH		BIT(3)
 
-#define NOTEBOOK_QUIRKS			QUIRK_FIX_NOTEBOOK_REPORT
+#define KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
+						 QUIRK_NO_INIT_REPORTS)
 #define TOUCHPAD_QUIRKS			(QUIRK_NO_INIT_REPORTS | \
 						 QUIRK_SKIP_INPUT_MAPPING | \
 						 QUIRK_IS_MULTITOUCH)
@@ -170,11 +171,11 @@ static int asus_raw_event(struct hid_device *hdev,
 
 static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
+	struct input_dev *input = hi->input;
 	struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
 
 	if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
 		int ret;
-		struct input_dev *input = hi->input;
 
 		input_set_abs_params(input, ABS_MT_POSITION_X, 0, MAX_X, 0, 0);
 		input_set_abs_params(input, ABS_MT_POSITION_Y, 0, MAX_Y, 0, 0);
@@ -191,10 +192,10 @@ static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi)
 			hid_err(hdev, "Asus input mt init slots failed: %d\n", ret);
 			return ret;
 		}
-
-		drvdata->input = input;
 	}
 
+	drvdata->input = input;
+
 	return 0;
 }
 
@@ -286,7 +287,11 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_stop_hw;
 	}
 
-	drvdata->input->name = "Asus TouchPad";
+	if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
+		drvdata->input->name = "Asus TouchPad";
+	} else {
+		drvdata->input->name = "Asus Keyboard";
+	}
 
 	if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
 		ret = asus_start_multitouch(hdev);
@@ -315,7 +320,7 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 static const struct hid_device_id asus_devices[] = {
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK,
-		 USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), NOTEBOOK_QUIRKS},
+		 USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), KEYBOARD_QUIRKS},
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK,
 			 USB_DEVICE_ID_ASUSTEK_TOUCHPAD), TOUCHPAD_QUIRKS },
 	{ }
-- 
cgit v1.2.3


From 2b6579d4a71afb19c6583470783371b992944f67 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick.colenbrander@sony.com>
Date: Thu, 8 Dec 2016 19:09:50 -0800
Subject: HID: sony: Fix error handling bug when touchpad registration fails

The error handling code in sony_input_configured in general uses goto
based cleanup. Recently we migrated code from sony_probe to here, but
we didn't update the existing touchpad registration code, which was
already here to use the goto.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 7687c0875395..f68c921af55e 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2548,7 +2548,7 @@ static int sony_input_configured(struct hid_device *hdev,
 			hid_err(sc->hdev,
 			"Unable to initialize multi-touch slots: %d\n",
 			ret);
-			return ret;
+			goto err_stop;
 		}
 
 		sony_init_output_report(sc, dualshock4_send_output_report);
-- 
cgit v1.2.3


From c70d5f70ccbbdf56bb86adb42127db90d0c90976 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick.colenbrander@sony.com>
Date: Thu, 8 Dec 2016 19:09:51 -0800
Subject: HID: sony: Use DS4 MAC address as unique identifier on USB

The DS4 MAC address is reported as a unique identified when
using Bluetooth. For USB there is no unique identifier reported
yet, so use the MAC address.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index f68c921af55e..c6982a29a56d 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2390,6 +2390,12 @@ static int sony_check_add(struct sony_sc *sc)
 		}
 
 		memcpy(sc->mac_address, &buf[1], sizeof(sc->mac_address));
+
+		snprintf(sc->hdev->uniq, sizeof(sc->hdev->uniq),
+			"%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+			sc->mac_address[5], sc->mac_address[4],
+			sc->mac_address[3], sc->mac_address[2],
+			sc->mac_address[1], sc->mac_address[0]);
 	} else if ((sc->quirks & SIXAXIS_CONTROLLER_USB) ||
 			(sc->quirks & NAVIGATION_CONTROLLER_USB)) {
 		buf = kmalloc(SIXAXIS_REPORT_0xF2_SIZE, GFP_KERNEL);
-- 
cgit v1.2.3


From 405182c2459fe2de4a3994ef39e866993e0e61d1 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick.colenbrander@sony.com>
Date: Thu, 8 Dec 2016 19:09:52 -0800
Subject: HID: sony: Ignore DS4 dongle reports when no device is connected

When the DS4 dongle is connected, it always generates HID reports
even when no DS4 is paired to it. This patch adds logic to ignore
HID reports from the dongle if there is no DS4 currently attached.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index c6982a29a56d..f405b07d0381 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -1099,8 +1099,11 @@ struct sony_sc {
 	u8 led_delay_on[MAX_LEDS];
 	u8 led_delay_off[MAX_LEDS];
 	u8 led_count;
+	bool ds4_dongle_connected;
 };
 
+static void sony_set_leds(struct sony_sc *sc);
+
 static inline void sony_schedule_work(struct sony_sc *sc)
 {
 	if (!sc->defer_initialization)
@@ -1430,6 +1433,31 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report,
 				return -EILSEQ;
 			}
 		}
+
+		/*
+		 * In the case of a DS4 USB dongle, bit[2] of byte 31 indicates
+		 * if a DS4 is actually connected (indicated by '0').
+		 * For non-dongle, this bit is always 0 (connected).
+		 */
+		if (sc->hdev->vendor == USB_VENDOR_ID_SONY &&
+		    sc->hdev->product == USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) {
+			bool connected = (rd[31] & 0x04) ? false : true;
+
+			if (!sc->ds4_dongle_connected && connected) {
+				hid_info(sc->hdev, "DualShock 4 USB dongle: controller connected\n");
+				sony_set_leds(sc);
+				sc->ds4_dongle_connected = true;
+			} else if (sc->ds4_dongle_connected && !connected) {
+				hid_info(sc->hdev, "DualShock 4 USB dongle: controller disconnected\n");
+				sc->ds4_dongle_connected = false;
+				/* Return 0, so hidraw can get the report. */
+				return 0;
+			} else if (!sc->ds4_dongle_connected) {
+				/* Return 0, so hidraw can get the report. */
+				return 0;
+			}
+		}
+
 		dualshock4_parse_report(sc, rd, size);
 	}
 
-- 
cgit v1.2.3


From 8f2b468aadc81ca0fc78e41696b648e30d91ba5c Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 20 Dec 2016 07:27:59 +0100
Subject: s390/vtime: correct system time accounting

There is a slight misaccounting of system time in vtime_account_user.
This function is called once per HZ tick in interrupt context.
The irq_enter function already accounted the system time up to the
point of the irq_enter call. The system time from irq_enter until
vtime_account_user/do_account_vtime is reached is irq time but it
is accounted to the previous context.

Just drop the hardirq offset from arch/s390/kernel/vtime.c.

Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vtime.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 6b246aadf311..1b5c5ee9fc1b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -94,7 +94,7 @@ static void update_mt_scaling(void)
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
-static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
+static int do_account_vtime(struct task_struct *tsk)
 {
 	u64 timer, clock, user, system, steal;
 	u64 user_scaled, system_scaled;
@@ -138,7 +138,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	}
 	account_user_time(tsk, user);
 	tsk->utimescaled += user_scaled;
-	account_system_time(tsk, hardirq_offset, system);
+	account_system_time(tsk, 0, system);
 	tsk->stimescaled += system_scaled;
 
 	steal = S390_lowcore.steal_timer;
@@ -152,7 +152,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 
 void vtime_task_switch(struct task_struct *prev)
 {
-	do_account_vtime(prev, 0);
+	do_account_vtime(prev);
 	prev->thread.user_timer = S390_lowcore.user_timer;
 	prev->thread.system_timer = S390_lowcore.system_timer;
 	S390_lowcore.user_timer = current->thread.user_timer;
@@ -166,7 +166,7 @@ void vtime_task_switch(struct task_struct *prev)
  */
 void vtime_account_user(struct task_struct *tsk)
 {
-	if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+	if (do_account_vtime(tsk))
 		virt_timer_expire();
 }
 
-- 
cgit v1.2.3


From cabab3f9f5ca077535080b3252e6168935b914af Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 20 Dec 2016 12:58:10 +0100
Subject: s390/kbuild: enable modversions for symbols exported from asm

s390 version of commit 334bb7738764 ("x86/kbuild: enable modversions
for symbols exported from asm") so we get also rid of all these
warnings:

WARNING: EXPORT symbol "_mcount" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "memcpy" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "memmove" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "memset" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "save_fpu_regs" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "sie64a" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "sie_exit" [vmlinux] version generation failed, symbol will not be versioned.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/asm-prototypes.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 arch/s390/include/asm/asm-prototypes.h

diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..2c3413b0ca52
--- /dev/null
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_S390_PROTOTYPES_H
+
+#include <linux/kvm_host.h>
+#include <linux/ftrace.h>
+#include <asm/fpu/api.h>
+#include <asm-generic/asm-prototypes.h>
+
+#endif /* _ASM_S390_PROTOTYPES_H */
-- 
cgit v1.2.3


From b508fc354f6d168ec18673d99d3bce9d6c1d9475 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 21 Dec 2016 11:20:45 +0100
Subject: nvme: update maintainers information

Switch the new, shared nvme git repository, which is co-maintained
by everyone involved with NVMe.

Also add the nvme_ioctl.h UAPI header to the files list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f6eb97b35e0f..9b279928461d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8852,17 +8852,22 @@ F:	drivers/video/fbdev/nvidia/
 NVM EXPRESS DRIVER
 M:	Keith Busch <keith.busch@intel.com>
 M:	Jens Axboe <axboe@fb.com>
+M:	Christoph Hellwig <hch@lst.de>
+M:	Sagi Grimberg <sagi@grimberg.me>
 L:	linux-nvme@lists.infradead.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
-W:	https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/
+T:	git://git.infradead.org/nvme.git
+W:	http://git.infradead.org/nvme.git
 S:	Supported
 F:	drivers/nvme/host/
 F:	include/linux/nvme.h
+F:	include/uapi/linux/nvme_ioctl.h
 
 NVM EXPRESS TARGET DRIVER
 M:	Christoph Hellwig <hch@lst.de>
 M:	Sagi Grimberg <sagi@grimberg.me>
 L:	linux-nvme@lists.infradead.org
+T:	git://git.infradead.org/nvme.git
+W:	http://git.infradead.org/nvme.git
 S:	Supported
 F:	drivers/nvme/target/
 
-- 
cgit v1.2.3


From e6282aef7b89a11d26e731060c4409b7aac278bf Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 19 Dec 2016 11:37:50 -0500
Subject: nvme: simplify stripe quirk

Some OEMs believe they own the Identify Controller vendor specific
region and will repurpose it with their own values. While not common,
we can't rely on the PCI VID:DID to tell use how to decode the field
we reserved for this as the stripe size so we need to do something else
for the list of devices using this quirk.

The field was supposed to allow flexibility on the device's back-end
striping, but it turned out that never materialized; the chunk is always
the same as MDTS in the products subscribing to this quirk, so this
patch removes the stripe_size field and sets the chunk to the max hw
transfer size for the devices using this quirk.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 17 ++---------------
 drivers/nvme/host/nvme.h |  1 -
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b40cfb076f02..2fc86dc7a8df 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1193,8 +1193,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
 	}
-	if (ctrl->stripe_size)
-		blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+	if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+		blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
 	blk_queue_virt_boundary(q, ctrl->page_size - 1);
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
 		vwc = true;
@@ -1250,19 +1250,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->max_hw_sectors =
 		min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
 
-	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
-		unsigned int max_hw_sectors;
-
-		ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
-		max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
-		if (ctrl->max_hw_sectors) {
-			ctrl->max_hw_sectors = min(max_hw_sectors,
-							ctrl->max_hw_sectors);
-		} else {
-			ctrl->max_hw_sectors = max_hw_sectors;
-		}
-	}
-
 	nvme_set_queue_limits(ctrl, ctrl->admin_q);
 	ctrl->sgls = le32_to_cpu(id->sgls);
 	ctrl->kas = le16_to_cpu(id->kas);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bd5321441d12..6377e14586dc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -135,7 +135,6 @@ struct nvme_ctrl {
 
 	u32 page_size;
 	u32 max_hw_sectors;
-	u32 stripe_size;
 	u16 oncs;
 	u16 vid;
 	atomic_t abort_limit;
-- 
cgit v1.2.3


From 9fa196e7fc7a0f12329d5346164abb27f026991c Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 19 Dec 2016 16:18:24 +0200
Subject: nvme/pci: Fix whitespace problem

Convert to tabs and remove unneeded whitespaces.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2fd7dc2e8fc4..7d6c87028568 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -50,7 +50,7 @@
 #define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
-		
+
 /*
  * We handle AEN commands ourselves and don't even let the
  * block layer know about them.
@@ -1909,10 +1909,10 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (!dev->bar)
 		goto release;
 
-       return 0;
+	return 0;
   release:
-       pci_release_mem_regions(pdev);
-       return -ENODEV;
+	pci_release_mem_regions(pdev);
+	return -ENODEV;
 }
 
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-- 
cgit v1.2.3


From ff13b39ecf726715a96fcd3c23e50eb792ef6516 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 9 Dec 2016 12:08:58 -0500
Subject: nvme/pci: Delete misleading queue-wrap comment

It is not theoretically possible for this driver to wrap twice while
processing completions. The driver allocates only 'queue_depth - 1'
tags, so there can never be more than that to reap when processing a
completion queue. Removing this misleading comment makes it a little
less likely people with broken controllers will blame the driver for
their spurious interrupts.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7d6c87028568..151ce59f4ffb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -712,15 +712,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
 		nvme_req(req)->result = cqe.result;
 		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
-
 	}
 
-	/* If the controller ignores the cq head doorbell and continuously
-	 * writes to the queue, it is theoretically possible to wrap around
-	 * the queue twice and mistakenly return IRQ_NONE.  Linux only
-	 * requires that 0.1% of your interrupts are handled, so this isn't
-	 * a big problem.
-	 */
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return;
 
-- 
cgit v1.2.3


From 2c473a9d02fbe881506d5d43bc7edb776f2f46f5 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 6 Dec 2016 10:14:35 -0800
Subject: nvme/scsi: Remove START STOP emulation

Now that the broken power state control is gone, it appears to serve
no purpose.  Just delete it.  NVME devices don't have a concept of
started vs stopped anyway.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/scsi.c | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index b71e95044b43..a5c09e703bd8 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -2160,30 +2160,6 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	u8 immed, no_flush;
-
-	immed = cmd[1] & 0x01;
-	no_flush = cmd[4] & 0x04;
-
-	if (immed != 0) {
-		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	} else {
-		if (no_flush == 0) {
-			/* Issue NVME FLUSH command prior to START STOP UNIT */
-			int res = nvme_trans_synchronize_cache(ns, hdr);
-			if (res)
-				return res;
-		}
-
-		return 0;
-	}
-}
-
 static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
@@ -2439,9 +2415,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 	case SECURITY_PROTOCOL_OUT:
 		retcode = nvme_trans_security_protocol(ns, hdr, cmd);
 		break;
-	case START_STOP:
-		retcode = nvme_trans_start_stop(ns, hdr, cmd);
-		break;
 	case SYNCHRONIZE_CACHE:
 		retcode = nvme_trans_synchronize_cache(ns, hdr);
 		break;
-- 
cgit v1.2.3


From c703489885218900579279cec4b4ab8e7fce383b Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Tue, 20 Dec 2016 11:06:08 -0800
Subject: nvme/fc: correct some printk information

Dan Carpenters's tool caught a pointer reference - should have been
just ptr, not &ptr.

Don't bother. Remove the pointer value in the printf. Its irrelevant.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 771e2e761872..827c2b57e5bb 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2401,8 +2401,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	WARN_ON_ONCE(!changed);
 
 	dev_info(ctrl->ctrl.device,
-		"NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n",
-		ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl);
+		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
+		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
 
 	kref_get(&ctrl->ctrl.kref);
 
-- 
cgit v1.2.3


From 17a1ec08ce7074f05795e5c32a3e5bc9a797bbf8 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 15 Dec 2016 14:20:48 +0100
Subject: nvme/fc: simplify error handling of nvme_fc_create_hw_io_queues

Simplify the error handling of nvme_fc_create_hw_io_queues(), this saves us
one variable and one level of indentation.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviwed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 827c2b57e5bb..aa0bc60810a7 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1491,19 +1491,20 @@ static int
 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 {
 	struct nvme_fc_queue *queue = &ctrl->queues[1];
-	int i, j, ret;
+	int i, ret;
 
 	for (i = 1; i < ctrl->queue_count; i++, queue++) {
 		ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
-		if (ret) {
-			for (j = i-1; j >= 0; j--)
-				__nvme_fc_delete_hw_queue(ctrl,
-						&ctrl->queues[j], j);
-			return ret;
-		}
+		if (ret)
+			goto delete_queues;
 	}
 
 	return 0;
+
+delete_queues:
+	for (; i >= 0; i--)
+		__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
+	return ret;
 }
 
 static int
-- 
cgit v1.2.3


From 6c73f949300f17851f53fa80c9d1611ccd6909d3 Mon Sep 17 00:00:00 2001
From: Daniel Verkamp <daniel.verkamp@intel.com>
Date: Fri, 9 Dec 2016 12:59:46 -0700
Subject: nvmet: fix KATO offset in Set Features

The Set Features implementation for Keep Alive Timer was using the wrong
structure when retrieving the KATO value; it was treating the Set
Features command as a Property Set command.

The NVMe spec defines the Keep Alive Timer feature as having one input
in CDW11 (4 bytes at offset 44 in the command) whereas the code was
reading 8 bytes at offset 48.

Since the Linux NVMe over Fabrics host never sets this feature, this
code has presumably never been tested.

Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ec1ad2aa0a4c..95ae52390478 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -382,7 +382,6 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
 {
 	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
-	u64 val;
 	u32 val32;
 	u16 status = 0;
 
@@ -392,8 +391,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
 			(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
 		break;
 	case NVME_FEAT_KATO:
-		val = le64_to_cpu(req->cmd->prop_set.value);
-		val32 = val & 0xffff;
+		val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
 		req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
 		nvmet_set_result(req, req->sq->ctrl->kato);
 		break;
-- 
cgit v1.2.3


From 7c3a23b85cac5f3caa531f369c1e3a5f1a8b555f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Dec 2016 14:59:47 +0000
Subject: nvmet/fcloop: remove some logically dead code performing redundant
 ret checks

The check to see if ret is non-zero and return this rather than count
is redundant in two occassions.  It is redundant because prior to this
check, the return code ret is already checked for a non-zero error
return value and we return from the function at that point.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fcloop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index bcb8ebeb01c5..4e8e6a22bce1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -845,7 +845,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
 	rport->lport = nport->lport;
 	nport->rport = rport;
 
-	return ret ? ret : count;
+	return count;
 }
 
 
@@ -952,7 +952,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
 	tport->lport = nport->lport;
 	nport->tport = tport;
 
-	return ret ? ret : count;
+	return count;
 }
 
 
-- 
cgit v1.2.3


From 64d656a162d7ba49d6d1863e41407b0f95e19258 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Dec 2016 19:20:45 +0100
Subject: block: add back plugging in __blkdev_direct_IO

This allows sending larger than 1 MB requests to devices that support
large I/O sizes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/block_dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7c4507224ed6..206a92aab52e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -328,6 +328,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = bdev_file_inode(file);
 	struct block_device *bdev = I_BDEV(inode);
+	struct blk_plug plug;
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	bool is_read = (iov_iter_rw(iter) == READ);
@@ -353,6 +354,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	dio->multi_bio = false;
 	dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
 
+	blk_start_plug(&plug);
 	for (;;) {
 		bio->bi_bdev = bdev;
 		bio->bi_iter.bi_sector = pos >> 9;
@@ -394,6 +396,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		submit_bio(bio);
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
 	}
+	blk_finish_plug(&plug);
 
 	if (!dio->is_sync)
 		return -EIOCBQUEUED;
-- 
cgit v1.2.3


From 72c5296f9d64d8f5f27c2133e5f108a45a353d71 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Tue, 20 Dec 2016 14:38:14 -0700
Subject: genhd: remove dead and duplicated scsi code

blk_scsi_cmd_filter use was deprecated by 4beab5c6 and the SCSI macros
are duplicated in blkdev.h, both likely reintroduced by a bad merge from
540eed56.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/genhd.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e0341af6950e..76f39754e7b0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -146,15 +146,6 @@ enum {
 	DISK_EVENT_EJECT_REQUEST		= 1 << 1, /* eject requested */
 };
 
-#define BLK_SCSI_MAX_CMDS	(256)
-#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-
-struct blk_scsi_cmd_filter {
-	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
-	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
-	struct kobject kobj;
-};
-
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
-- 
cgit v1.2.3


From e568df6b84ff05a22467503afc11bee7a6ba0700 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 10 Aug 2016 16:42:53 +0200
Subject: ext2: Return BH_New buffers for zeroed blocks

So far we did not return BH_New buffers from ext2_get_blocks() when we
allocated and zeroed-out a block for DAX inode to avoid racy zeroing in
DAX code. This zeroing is gone these days so we can remove the
workaround.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext2/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0093ea2512a8..f073bfca694b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode,
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
 		}
-	} else {
-		*new = true;
 	}
+	*new = true;
 
 	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
 	mutex_unlock(&ei->truncate_mutex);
-- 
cgit v1.2.3


From c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 10 Aug 2016 17:22:44 +0200
Subject: mm: Invalidate DAX radix tree entries only if appropriate

Currently invalidate_inode_pages2_range() and invalidate_mapping_pages()
just delete all exceptional radix tree entries they find. For DAX this
is not desirable as we track cache dirtiness in these entries and when
they are evicted, we may not flush caches although it is necessary. This
can for example manifest when we write to the same block both via mmap
and via write(2) (to different offsets) and fsync(2) then does not
properly flush CPU caches when modification via write(2) was the last
one.

Create appropriate DAX functions to handle invalidation of DAX entries
for invalidate_inode_pages2_range() and invalidate_mapping_pages() and
wire them up into the corresponding mm functions.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c            | 71 +++++++++++++++++++++++++++++++++++++++++++-------
 include/linux/dax.h |  3 +++
 mm/truncate.c       | 75 +++++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 125 insertions(+), 24 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index a8732fbed381..bcfedd184860 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 }
 
+static int __dax_invalidate_mapping_entry(struct address_space *mapping,
+					  pgoff_t index, bool trunc)
+{
+	int ret = 0;
+	void *entry;
+	struct radix_tree_root *page_tree = &mapping->page_tree;
+
+	spin_lock_irq(&mapping->tree_lock);
+	entry = get_unlocked_mapping_entry(mapping, index, NULL);
+	if (!entry || !radix_tree_exceptional_entry(entry))
+		goto out;
+	if (!trunc &&
+	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
+	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+		goto out;
+	radix_tree_delete(page_tree, index);
+	mapping->nrexceptional--;
+	ret = 1;
+out:
+	put_unlocked_mapping_entry(mapping, index, entry);
+	spin_unlock_irq(&mapping->tree_lock);
+	return ret;
+}
 /*
  * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
  * entry to get unlocked before deleting it.
  */
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
-	void *entry;
+	int ret = __dax_invalidate_mapping_entry(mapping, index, true);
 
-	spin_lock_irq(&mapping->tree_lock);
-	entry = get_unlocked_mapping_entry(mapping, index, NULL);
 	/*
 	 * This gets called from truncate / punch_hole path. As such, the caller
 	 * must hold locks protecting against concurrent modifications of the
@@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	 * caller has seen exceptional entry for this index, we better find it
 	 * at that index as well...
 	 */
-	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) {
-		spin_unlock_irq(&mapping->tree_lock);
-		return 0;
-	}
-	radix_tree_delete(&mapping->page_tree, index);
+	WARN_ON_ONCE(!ret);
+	return ret;
+}
+
+/*
+ * Invalidate exceptional DAX entry if easily possible. This handles DAX
+ * entries for invalidate_inode_pages() so we evict the entry only if we can
+ * do so without blocking.
+ */
+int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
+{
+	int ret = 0;
+	void *entry, **slot;
+	struct radix_tree_root *page_tree = &mapping->page_tree;
+
+	spin_lock_irq(&mapping->tree_lock);
+	entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
+	if (!entry || !radix_tree_exceptional_entry(entry) ||
+	    slot_locked(mapping, slot))
+		goto out;
+	if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
+	    radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+		goto out;
+	radix_tree_delete(page_tree, index);
 	mapping->nrexceptional--;
+	ret = 1;
+out:
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, entry, true);
+	if (ret)
+		dax_wake_mapping_entry_waiter(mapping, index, entry, true);
+	return ret;
+}
 
-	return 1;
+/*
+ * Invalidate exceptional DAX entry if it is clean.
+ */
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+				      pgoff_t index)
+{
+	return __dax_invalidate_mapping_entry(mapping, index, false);
 }
 
 /*
diff --git a/include/linux/dax.h b/include/linux/dax.h
index f97bcfe79472..24ad71173995 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+				      pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index fd97f1dbce29..dd7b24e083c5 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -24,20 +24,12 @@
 #include <linux/rmap.h>
 #include "internal.h"
 
-static void clear_exceptional_entry(struct address_space *mapping,
-				    pgoff_t index, void *entry)
+static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
+			       void *entry)
 {
 	struct radix_tree_node *node;
 	void **slot;
 
-	/* Handled by shmem itself */
-	if (shmem_mapping(mapping))
-		return;
-
-	if (dax_mapping(mapping)) {
-		dax_delete_mapping_entry(mapping, index);
-		return;
-	}
 	spin_lock_irq(&mapping->tree_lock);
 	/*
 	 * Regular page slots are stabilized by the page lock even
@@ -55,6 +47,56 @@ unlock:
 	spin_unlock_irq(&mapping->tree_lock);
 }
 
+/*
+ * Unconditionally remove exceptional entry. Usually called from truncate path.
+ */
+static void truncate_exceptional_entry(struct address_space *mapping,
+				       pgoff_t index, void *entry)
+{
+	/* Handled by shmem itself */
+	if (shmem_mapping(mapping))
+		return;
+
+	if (dax_mapping(mapping)) {
+		dax_delete_mapping_entry(mapping, index);
+		return;
+	}
+	clear_shadow_entry(mapping, index, entry);
+}
+
+/*
+ * Invalidate exceptional entry if easily possible. This handles exceptional
+ * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
+ * clean entries.
+ */
+static int invalidate_exceptional_entry(struct address_space *mapping,
+					pgoff_t index, void *entry)
+{
+	/* Handled by shmem itself */
+	if (shmem_mapping(mapping))
+		return 1;
+	if (dax_mapping(mapping))
+		return dax_invalidate_mapping_entry(mapping, index);
+	clear_shadow_entry(mapping, index, entry);
+	return 1;
+}
+
+/*
+ * Invalidate exceptional entry if clean. This handles exceptional entries for
+ * invalidate_inode_pages2() so for DAX it evicts only clean entries.
+ */
+static int invalidate_exceptional_entry2(struct address_space *mapping,
+					 pgoff_t index, void *entry)
+{
+	/* Handled by shmem itself */
+	if (shmem_mapping(mapping))
+		return 1;
+	if (dax_mapping(mapping))
+		return dax_invalidate_mapping_entry_sync(mapping, index);
+	clear_shadow_entry(mapping, index, entry);
+	return 1;
+}
+
 /**
  * do_invalidatepage - invalidate part or all of a page
  * @page: the page which is affected
@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				break;
 
 			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
+				truncate_exceptional_entry(mapping, index,
+							   page);
 				continue;
 			}
 
@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			}
 
 			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
+				truncate_exceptional_entry(mapping, index,
+							   page);
 				continue;
 			}
 
@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 				break;
 
 			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
+				invalidate_exceptional_entry(mapping, index,
+							     page);
 				continue;
 			}
 
@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 				break;
 
 			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
+				if (!invalidate_exceptional_entry2(mapping,
+								   index, page))
+					ret = -EBUSY;
 				continue;
 			}
 
-- 
cgit v1.2.3


From e3fce68cdbed297d927e993b3ea7b8b1cee545da Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 10 Aug 2016 17:10:28 +0200
Subject: dax: Avoid page invalidation races and unnecessary radix tree
 traversals

Currently dax_iomap_rw() takes care of invalidating page tables and
evicting hole pages from the radix tree when write(2) to the file
happens. This invalidation is only necessary when there is some block
allocation resulting from write(2). Furthermore in current place the
invalidation is racy wrt page fault instantiating a hole page just after
we have invalidated it.

So perform the page invalidation inside dax_iomap_actor() where we can
do it only when really necessary and after blocks have been allocated so
nobody will be instantiating new hole pages anymore.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index bcfedd184860..08e15db28b79 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -985,6 +985,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
 		return -EIO;
 
+	/*
+	 * Write can allocate block for an area which has a hole page mapped
+	 * into page tables. We have to tear down these mappings so that data
+	 * written by write(2) is visible in mmap.
+	 */
+	if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
+		invalidate_inode_pages2_range(inode->i_mapping,
+					      pos >> PAGE_SHIFT,
+					      (end - 1) >> PAGE_SHIFT);
+	}
+
 	while (pos < end) {
 		unsigned offset = pos & (PAGE_SIZE - 1);
 		struct blk_dax_ctl dax = { 0 };
@@ -1043,23 +1054,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == WRITE)
 		flags |= IOMAP_WRITE;
 
-	/*
-	 * Yes, even DAX files can have page cache attached to them:  A zeroed
-	 * page is inserted into the pagecache when we have to serve a write
-	 * fault on a hole.  It should never be dirtied and can simply be
-	 * dropped from the pagecache once we get real data for the page.
-	 *
-	 * XXX: This is racy against mmap, and there's nothing we can do about
-	 * it. We'll eventually need to shift this down even further so that
-	 * we can check if we allocated blocks over a hole first.
-	 */
-	if (mapping->nrpages) {
-		ret = invalidate_inode_pages2_range(mapping,
-				pos >> PAGE_SHIFT,
-				(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
-	}
-
 	while (iov_iter_count(iter)) {
 		ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
 				iter, dax_iomap_actor);
-- 
cgit v1.2.3


From f449b936f1aff7696b24a338f493d5cee8d48d55 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 19 Oct 2016 14:48:38 +0200
Subject: dax: Finish fault completely when loading holes

The only case when we do not finish the page fault completely is when we
are loading hole pages into a radix tree. Avoid this special case and
finish the fault in that case as well inside the DAX fault handler. It
will allow us for easier iomap handling.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 08e15db28b79..bfec6f2ef613 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -539,15 +539,16 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
  * otherwise it will simply fall out of the page cache under memory
  * pressure without ever having been dirtied.
  */
-static int dax_load_hole(struct address_space *mapping, void *entry,
+static int dax_load_hole(struct address_space *mapping, void **entry,
 			 struct vm_fault *vmf)
 {
 	struct page *page;
+	int ret;
 
 	/* Hole page already exists? Return it...  */
-	if (!radix_tree_exceptional_entry(entry)) {
-		vmf->page = entry;
-		return VM_FAULT_LOCKED;
+	if (!radix_tree_exceptional_entry(*entry)) {
+		page = *entry;
+		goto out;
 	}
 
 	/* This will replace locked radix tree entry with a hole page */
@@ -555,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 				   vmf->gfp_mask | __GFP_ZERO);
 	if (!page)
 		return VM_FAULT_OOM;
+ out:
 	vmf->page = page;
-	return VM_FAULT_LOCKED;
+	ret = finish_fault(vmf);
+	vmf->page = NULL;
+	*entry = page;
+	if (!ret) {
+		/* Grab reference for PTE that is now referencing the page */
+		get_page(page);
+		return VM_FAULT_NOPAGE;
+	}
+	return ret;
 }
 
 static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
@@ -1163,8 +1173,8 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
-			vmf_ret = dax_load_hole(mapping, entry, vmf);
-			break;
+			vmf_ret = dax_load_hole(mapping, &entry, vmf);
+			goto finish_iomap;
 		}
 		/*FALLTHRU*/
 	default:
@@ -1185,8 +1195,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		}
 	}
  unlock_entry:
-	if (vmf_ret != VM_FAULT_LOCKED || error)
-		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  out:
 	if (error == -ENOMEM)
 		return VM_FAULT_OOM | major;
-- 
cgit v1.2.3


From 9f141d6ef6258a3a37a045842d9ba7e68f368956 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 19 Oct 2016 14:34:31 +0200
Subject: dax: Call ->iomap_begin without entry lock during dax fault

Currently ->iomap_begin() handler is called with entry lock held. If the
filesystem held any locks between ->iomap_begin() and ->iomap_end()
(such as ext4 which will want to hold transaction open), this would cause
lock inversion with the iomap_apply() from standard IO path which first
calls ->iomap_begin() and only then calls ->actor() callback which grabs
entry locks for DAX (if it faults when copying from/to user provided
buffers).

Fix the problem by nesting grabbing of entry lock inside ->iomap_begin()
- ->iomap_end() pair.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 121 ++++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 66 insertions(+), 55 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index bfec6f2ef613..5c74f60d0a50 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1078,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 }
 EXPORT_SYMBOL_GPL(dax_iomap_rw);
 
+static int dax_fault_return(int error)
+{
+	if (error == 0)
+		return VM_FAULT_NOPAGE;
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM;
+	return VM_FAULT_SIGBUS;
+}
+
 /**
  * dax_iomap_fault - handle a page fault on a DAX file
  * @vma: The virtual memory area where the fault occurred
@@ -1110,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (pos >= i_size_read(inode))
 		return VM_FAULT_SIGBUS;
 
-	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-	if (IS_ERR(entry)) {
-		error = PTR_ERR(entry);
-		goto out;
-	}
-
 	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
 		flags |= IOMAP_WRITE;
 
@@ -1126,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	 */
 	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
 	if (error)
-		goto unlock_entry;
+		return dax_fault_return(error);
 	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
-		error = -EIO;		/* fs corruption? */
+		vmf_ret = dax_fault_return(-EIO);	/* fs corruption? */
+		goto finish_iomap;
+	}
+
+	entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
+	if (IS_ERR(entry)) {
+		vmf_ret = dax_fault_return(PTR_ERR(entry));
 		goto finish_iomap;
 	}
 
@@ -1151,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		}
 
 		if (error)
-			goto finish_iomap;
+			goto error_unlock_entry;
 
 		__SetPageUptodate(vmf->cow_page);
 		vmf_ret = finish_fault(vmf);
 		if (!vmf_ret)
 			vmf_ret = VM_FAULT_DONE_COW;
-		goto finish_iomap;
+		goto unlock_entry;
 	}
 
 	switch (iomap.type) {
@@ -1169,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		}
 		error = dax_insert_mapping(mapping, iomap.bdev, sector,
 				PAGE_SIZE, &entry, vma, vmf);
+		/* -EBUSY is fine, somebody else faulted on the same PTE */
+		if (error == -EBUSY)
+			error = 0;
 		break;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
 			vmf_ret = dax_load_hole(mapping, &entry, vmf);
-			goto finish_iomap;
+			goto unlock_entry;
 		}
 		/*FALLTHRU*/
 	default:
@@ -1183,30 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		break;
 	}
 
- finish_iomap:
-	if (ops->iomap_end) {
-		if (error || (vmf_ret & VM_FAULT_ERROR)) {
-			/* keep previous error */
-			ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
-					&iomap);
-		} else {
-			error = ops->iomap_end(inode, pos, PAGE_SIZE,
-					PAGE_SIZE, flags, &iomap);
-		}
-	}
+ error_unlock_entry:
+	vmf_ret = dax_fault_return(error) | major;
  unlock_entry:
 	put_locked_mapping_entry(mapping, vmf->pgoff, entry);
- out:
-	if (error == -ENOMEM)
-		return VM_FAULT_OOM | major;
-	/* -EBUSY is fine, somebody else faulted on the same PTE */
-	if (error < 0 && error != -EBUSY)
-		return VM_FAULT_SIGBUS | major;
-	if (vmf_ret) {
-		WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
-		return vmf_ret;
+ finish_iomap:
+	if (ops->iomap_end) {
+		int copied = PAGE_SIZE;
+
+		if (vmf_ret & VM_FAULT_ERROR)
+			copied = 0;
+		/*
+		 * The fault is done by now and there's no way back (other
+		 * thread may be already happily using PTE we have installed).
+		 * Just ignore error from ->iomap_end since we cannot do much
+		 * with it.
+		 */
+		ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
 	}
-	return VM_FAULT_NOPAGE | major;
+	return vmf_ret;
 }
 EXPORT_SYMBOL_GPL(dax_iomap_fault);
 
@@ -1330,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
 		goto fallback;
 
-	/*
-	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
-	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
-	 * the tree, for instance), it will return -EEXIST and we just fall
-	 * back to 4k entries.
-	 */
-	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
-	if (IS_ERR(entry))
-		goto fallback;
-
 	/*
 	 * Note that we don't use iomap_apply here.  We aren't doing I/O, only
 	 * setting up a mapping, so really we're using iomap_begin() as a way
@@ -1348,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	pos = (loff_t)pgoff << PAGE_SHIFT;
 	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
 	if (error)
-		goto unlock_entry;
+		goto fallback;
+
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
+	/*
+	 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+	 * PMD or a HZP entry.  If it can't (because a 4k page is already in
+	 * the tree, for instance), it will return -EEXIST and we just fall
+	 * back to 4k entries.
+	 */
+	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	if (IS_ERR(entry))
+		goto finish_iomap;
+
 	vmf.pgoff = pgoff;
 	vmf.flags = flags;
 	vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
@@ -1364,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
 		if (WARN_ON_ONCE(write))
-			goto finish_iomap;
+			goto unlock_entry;
 		result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
 				&entry);
 		break;
@@ -1373,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		break;
 	}
 
+ unlock_entry:
+	put_locked_mapping_entry(mapping, pgoff, entry);
  finish_iomap:
 	if (ops->iomap_end) {
-		if (result == VM_FAULT_FALLBACK) {
-			ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
-					&iomap);
-		} else {
-			error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
-					iomap_flags, &iomap);
-			if (error)
-				result = VM_FAULT_FALLBACK;
-		}
+		int copied = PMD_SIZE;
+
+		if (result == VM_FAULT_FALLBACK)
+			copied = 0;
+		/*
+		 * The fault is done by now and there's no way back (other
+		 * thread may be already happily using PMD we have installed).
+		 * Just ignore error from ->iomap_end since we cannot do much
+		 * with it.
+		 */
+		ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
+				&iomap);
 	}
- unlock_entry:
-	put_locked_mapping_entry(mapping, pgoff, entry);
  fallback:
 	if (result == VM_FAULT_FALLBACK) {
 		split_huge_pmd(vma, pmd, address);
-- 
cgit v1.2.3


From 1db175428ee374489448361213e9c3b749d14900 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 21 Oct 2016 11:33:49 +0200
Subject: ext4: Simplify DAX fault path

Now that dax_iomap_fault() calls ->iomap_begin() without entry lock, we
can use transaction starting in ext4_iomap_begin() and thus simplify
ext4_dax_fault(). It also provides us proper retries in case of ENOSPC.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext4/file.c | 48 ++++++++++--------------------------------------
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b5f184493c57..d663d3d7c81c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -258,7 +258,6 @@ out:
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	int result;
-	handle_t *handle = NULL;
 	struct inode *inode = file_inode(vma->vm_file);
 	struct super_block *sb = inode->i_sb;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
-		down_read(&EXT4_I(inode)->i_mmap_sem);
-		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
-						EXT4_DATA_TRANS_BLOCKS(sb));
-	} else
-		down_read(&EXT4_I(inode)->i_mmap_sem);
-
-	if (IS_ERR(handle))
-		result = VM_FAULT_SIGBUS;
-	else
-		result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
-
-	if (write) {
-		if (!IS_ERR(handle))
-			ext4_journal_stop(handle);
-		up_read(&EXT4_I(inode)->i_mmap_sem);
+	}
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	if (write)
 		sb_end_pagefault(sb);
-	} else
-		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 						pmd_t *pmd, unsigned int flags)
 {
 	int result;
-	handle_t *handle = NULL;
 	struct inode *inode = file_inode(vma->vm_file);
 	struct super_block *sb = inode->i_sb;
 	bool write = flags & FAULT_FLAG_WRITE;
@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
-		down_read(&EXT4_I(inode)->i_mmap_sem);
-		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
-				ext4_chunk_trans_blocks(inode,
-							PMD_SIZE / PAGE_SIZE));
-	} else
-		down_read(&EXT4_I(inode)->i_mmap_sem);
-
-	if (IS_ERR(handle))
-		result = VM_FAULT_SIGBUS;
-	else {
-		result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
-					     &ext4_iomap_ops);
 	}
-
-	if (write) {
-		if (!IS_ERR(handle))
-			ext4_journal_stop(handle);
-		up_read(&EXT4_I(inode)->i_mmap_sem);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
+				     &ext4_iomap_ops);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	if (write)
 		sb_end_pagefault(sb);
-	} else
-		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
-- 
cgit v1.2.3


From 02608e02fbec04fccf2eb0cc8d8082f65c0a4286 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Wed, 21 Dec 2016 12:32:54 -0800
Subject: crypto: testmgr - Use heap buffer for acomp test input

Christopher Covington reported a crash on aarch64 on recent Fedora
kernels:

kernel BUG at ./include/linux/scatterlist.h:140!
Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
Modules linked in:
CPU: 2 PID: 752 Comm: cryptomgr_test Not tainted 4.9.0-11815-ge93b1cc #162
Hardware name: linux,dummy-virt (DT)
task: ffff80007c650080 task.stack: ffff800008910000
PC is at sg_init_one+0xa0/0xb8
LR is at sg_init_one+0x24/0xb8
...
[<ffff000008398db8>] sg_init_one+0xa0/0xb8
[<ffff000008350a44>] test_acomp+0x10c/0x438
[<ffff000008350e20>] alg_test_comp+0xb0/0x118
[<ffff00000834f28c>] alg_test+0x17c/0x2f0
[<ffff00000834c6a4>] cryptomgr_test+0x44/0x50
[<ffff0000080dac70>] kthread+0xf8/0x128
[<ffff000008082ec0>] ret_from_fork+0x10/0x50

The test vectors used for input are part of the kernel image. These
inputs are passed as a buffer to sg_init_one which eventually blows up
with BUG_ON(!virt_addr_valid(buf)). On arm64, virt_addr_valid returns
false for the kernel image since virt_to_page will not return the
correct page. Fix this by copying the input vectors to heap buffer
before setting up the scatterlist.

Reported-by: Christopher Covington <cov@codeaurora.org>
Fixes: d7db7a882deb ("crypto: acomp - update testmgr with support for acomp")
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f616ad74cce7..44e888b0b041 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 	for (i = 0; i < ctcount; i++) {
 		unsigned int dlen = COMP_BUF_SIZE;
 		int ilen = ctemplate[i].inlen;
+		void *input_vec;
 
+		input_vec = kmalloc(ilen, GFP_KERNEL);
+		if (!input_vec) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		memcpy(input_vec, ctemplate[i].input, ilen);
 		memset(output, 0, dlen);
 		init_completion(&result.completion);
-		sg_init_one(&src, ctemplate[i].input, ilen);
+		sg_init_one(&src, input_vec, ilen);
 		sg_init_one(&dst, output, dlen);
 
 		req = acomp_request_alloc(tfm);
 		if (!req) {
 			pr_err("alg: acomp: request alloc failed for %s\n",
 			       algo);
+			kfree(input_vec);
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 		if (ret) {
 			pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
 			       i + 1, algo, -ret);
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
@@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 			pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
 			       i + 1, algo, req->dlen);
 			ret = -EINVAL;
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
@@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 			       i + 1, algo);
 			hexdump(output, req->dlen);
 			ret = -EINVAL;
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
 
+		kfree(input_vec);
 		acomp_request_free(req);
 	}
 
 	for (i = 0; i < dtcount; i++) {
 		unsigned int dlen = COMP_BUF_SIZE;
 		int ilen = dtemplate[i].inlen;
+		void *input_vec;
+
+		input_vec = kmalloc(ilen, GFP_KERNEL);
+		if (!input_vec) {
+			ret = -ENOMEM;
+			goto out;
+		}
 
+		memcpy(input_vec, dtemplate[i].input, ilen);
 		memset(output, 0, dlen);
 		init_completion(&result.completion);
-		sg_init_one(&src, dtemplate[i].input, ilen);
+		sg_init_one(&src, input_vec, ilen);
 		sg_init_one(&dst, output, dlen);
 
 		req = acomp_request_alloc(tfm);
 		if (!req) {
 			pr_err("alg: acomp: request alloc failed for %s\n",
 			       algo);
+			kfree(input_vec);
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 		if (ret) {
 			pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n",
 			       i + 1, algo, -ret);
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
@@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 			pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n",
 			       i + 1, algo, req->dlen);
 			ret = -EINVAL;
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
@@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
 			       i + 1, algo);
 			hexdump(output, req->dlen);
 			ret = -EINVAL;
+			kfree(input_vec);
 			acomp_request_free(req);
 			goto out;
 		}
 
+		kfree(input_vec);
 		acomp_request_free(req);
 	}
 
-- 
cgit v1.2.3


From 66115335fbb411365c23349b2fbe7e041eabbaf2 Mon Sep 17 00:00:00 2001
From: John Brooks <john@fastquake.com>
Date: Fri, 23 Dec 2016 00:53:10 +0000
Subject: docs: Fix build failure

The 80211.tmpl DocBook file was removed in commit 819bf593767c ("docs-rst:
sphinxify 802.11 documentation"), but the 80211.xml target was re-added to
the Makefile by commit 7ddedebb03b7 ("ALSA: doc: ReSTize
writing-an-alsa-driver document"), leading to a failure when building the
documentation:

*** No rule to make target 'Documentation/DocBook/80211.xml', needed by
'Documentation/DocBook/80211.aux.xml'.

cc: stable@vger.kernel.org
Signed-off-by: John Brooks <john@fastquake.com>
Mea-culpa-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DocBook/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index c75e5d6b8fa8..a6eb7dcd4dd5 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml  \
 	    kernel-api.xml filesystems.xml lsm.xml kgdb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-	    80211.xml sh.xml regulator.xml w1.xml \
+	    sh.xml regulator.xml w1.xml \
 	    writing_musb_glue_layer.xml iio.xml
 
 ifeq ($(DOCBOOKS),)
-- 
cgit v1.2.3


From 36f671be1db1b17d3d4ab0c8b47f81ccb1efcb75 Mon Sep 17 00:00:00 2001
From: Cihangir Akturk <cakturk@gmail.com>
Date: Sat, 17 Dec 2016 19:42:17 +0200
Subject: Documentation/unaligned-memory-access.txt: fix incorrect comparison
 operator

In the actual implementation ether_addr_equal function tests for equality to 0
when returning. It seems in commit 0d74c4 it is somehow overlooked to change
this operator to reflect the actual function.

Signed-off-by: Cihangir Akturk <cakturk@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/unaligned-memory-access.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index a445da098bc6..3f76c0c37920 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
 #else
 	const u16 *a = (const u16 *)addr1;
 	const u16 *b = (const u16 *)addr2;
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
 #endif
 }
 
-- 
cgit v1.2.3


From fe4f6c801c03bc13113d0dc32f02d4ea8ed89ffd Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 27 Dec 2016 19:46:27 -0500
Subject: fscrypt: fix the test_dummy_encryption mount option

Commit f1c131b45410a: "crypto: xts - Convert to skcipher" now fails
the setkey operation if the AES key is the same as the tweak key.
Previously this check was only done if FIPS mode is enabled.  Now this
check is also done if weak key checking was requested.  This is
reasonable, but since we were using the dummy key which was a constant
series of 0x42 bytes, it now caused dummy encrpyption test mode to
fail.

Fix this by using 0x42... and 0x24... for the two keys, so they are
different.

Fixes: f1c131b45410a202eb45cc55980a7a9e4e4b4f40
Cc: stable@vger.kernel.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/keyinfo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 6eeea1dcba41..95cd4c3b06c3 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -248,7 +248,8 @@ retry:
 		goto out;
 
 	if (fscrypt_dummy_context_enabled(inode)) {
-		memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
+		memset(raw_key, 0x42, keysize/2);
+		memset(raw_key+keysize/2, 0x24, keysize - (keysize/2));
 		goto got_key;
 	}
 
-- 
cgit v1.2.3


From 5701659004d68085182d2fd4199c79172165fa65 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 27 Dec 2016 18:23:06 -0800
Subject: net: stmmac: Fix race between stmmac_drv_probe and stmmac_open

There is currently a small window during which the network device registered by
stmmac can be made visible, yet all resources, including and clock and MDIO bus
have not had a chance to be set up, this can lead to the following error to
occur:

[  473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
                stmmac_dvr_probe: warning: cannot get CSR clock
[  473.919382] stmmaceth 0000:01:00.0: no reset control found
[  473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42
[  473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported
[  473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported
[  473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported
[  473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
                Enable RX Mitigation via HW Watchdog Timer
[  473.921395] libphy: PHY stmmac-1:00 not found
[  473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY
[  473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to
                PHY (error: -19)
[  473.959710] libphy: stmmac: probed
[  473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL
                (stmmac-1:00) active
[  473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL
                (stmmac-1:01)
[  473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL
                (stmmac-1:02)
[  473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL
                (stmmac-1:03)

Fix this by making sure that register_netdev() is the last thing being done,
which guarantees that the clock and the MDIO bus are available.

Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove")
Reported-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bb40382e205d..5910ea51f8f6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3339,13 +3339,6 @@ int stmmac_dvr_probe(struct device *device,
 
 	spin_lock_init(&priv->lock);
 
-	ret = register_netdev(ndev);
-	if (ret) {
-		netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
-			   __func__, ret);
-		goto error_netdev_register;
-	}
-
 	/* If a specific clk_csr value is passed from the platform
 	 * this means that the CSR Clock Range selection cannot be
 	 * changed at run-time and it is fixed. Viceversa the driver'll try to
@@ -3372,11 +3365,14 @@ int stmmac_dvr_probe(struct device *device,
 		}
 	}
 
-	return 0;
+	ret = register_netdev(ndev);
+	if (ret)
+		netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
+			   __func__, ret);
+
+	return ret;
 
 error_mdio_register:
-	unregister_netdev(ndev);
-error_netdev_register:
 	netif_napi_del(&priv->napi);
 error_hw_init:
 	clk_disable_unprepare(priv->pclk);
-- 
cgit v1.2.3


From 0df0f207aab4f42e5c96a807adf9a6845b69e984 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Wed, 28 Dec 2016 14:54:47 +0200
Subject: net/sched: cls_flower: Fix missing addr_type in classify

Since we now use a non zero mask on addr_type, we are matching on its
value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip
failed in SW/classify path since its value was zero.
This patch sets the proper value of addr_type for encapsulated packets.

Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 333f8e268431..970db7a41684 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -153,10 +153,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 		switch (ip_tunnel_info_af(info)) {
 		case AF_INET:
+			skb_key.enc_control.addr_type =
+				FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 			skb_key.enc_ipv4.src = key->u.ipv4.src;
 			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
 			break;
 		case AF_INET6:
+			skb_key.enc_control.addr_type =
+				FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 			skb_key.enc_ipv6.src = key->u.ipv6.src;
 			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
 			break;
-- 
cgit v1.2.3


From 9da34cd34e85aacc55af8774b81b1f23e86014f9 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:31 +0200
Subject: net/mlx5: Disable RoCE on the e-switch management port under
 switchdev mode

Under the switchdev/offloads mode, packets that don't match any
e-switch steering rule are sent towards the e-switch management
port. We use a NIC HW steering rule set per vport (uplink and VFs)
to make them be received into the host OS through the respective
vport representor netdevice.

Currnetly such missed RoCE packets will not get to this NIC steering
rule, and hence VF RoCE will not work over the slow path of the offloads
mode. This is b/c these packets will be matched by a steering rule added
by the firmware that serves RoCE traffic set on the PF NIC vport which
is also the e-switch management port under SRIOV.

Disabling RoCE on the e-switch management vport when we are in the offloads
mode, will signal to the firmware to remove their RoCE rule, and then the
missed RoCE packets will be matched by the representor NIC steering rule
as any other missed packets.

To achieve that, we disable RoCE on the PF vport. We do that by removing
(hot-unplugging) the IB device instance associated with the PF. This is
also required by our current model where the PF serves as the uplink
representor and hence only SW switching (TC, bridge, OVS) applications
and slow path vport mlx5e net-device should be running over that vport.

Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode changes')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 466e161010f7..03293ed1cc22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -695,6 +695,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 		if (err)
 			goto err_reps;
 	}
+
+	/* disable PF RoCE so missed packets don't go through RoCE steering */
+	mlx5_dev_list_lock();
+	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_dev_list_unlock();
+
 	return 0;
 
 err_reps:
@@ -718,6 +724,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 {
 	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
+	/* enable back PF RoCE */
+	mlx5_dev_list_lock();
+	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_dev_list_unlock();
+
 	mlx5_eswitch_disable_sriov(esw);
 	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
 	if (err) {
-- 
cgit v1.2.3


From 883371c453b937f9eb581fb4915210865982736f Mon Sep 17 00:00:00 2001
From: Noa Osherovich <noaos@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:32 +0200
Subject: net/mlx5: Check FW limitations on log_max_qp before setting it

When setting HCA capabilities, set log_max_qp to be the minimum
between the selected profile's value and the HCA limitation.

Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...')
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 54e5a786f191..23c12f1aaa39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -503,6 +503,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
 		 to_fw_pkey_sz(dev, 128));
 
+	/* Check log_max_qp from HCA caps to set in current profile */
+	if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+		mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
+			       profile[prof_sel].log_max_qp,
+			       MLX5_CAP_GEN_MAX(dev, log_max_qp));
+		profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+	}
 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
 			 prof->log_max_qp);
-- 
cgit v1.2.3


From 689a248df83b6032edc57e86267b4e5cc8d7174e Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:33 +0200
Subject: net/mlx5: Cancel recovery work in remove flow

If there is pending delayed work for health recovery it must be canceled
if the device is being unloaded.

Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work")
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 23c12f1aaa39..0b49739eadd3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1196,6 +1196,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 {
 	int err = 0;
 
+	mlx5_drain_health_wq(dev);
+
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
 		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
@@ -1358,10 +1360,9 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev);
 	mlx5_unload_one(dev, priv, false);
-	/* In case of kernel call save the pci state and drain health wq */
+	/* In case of kernel call save the pci state */
 	if (state) {
 		pci_save_state(pdev);
-		mlx5_drain_health_wq(dev);
 		mlx5_pci_disable_device(dev);
 	}
 
-- 
cgit v1.2.3


From d151d73dcc99de87c63bdefebcc4cb69de1cdc40 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:34 +0200
Subject: net/mlx5: Avoid shadowing numa_node

Avoid using a local variable named numa_node to avoid shadowing a public
one.

Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0b49739eadd3..6547f22e6b9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -582,7 +582,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	struct mlx5_priv *priv  = &mdev->priv;
 	struct msix_entry *msix = priv->msix_arr;
 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
-	int numa_node           = priv->numa_node;
 	int err;
 
 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
@@ -590,7 +589,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 		return -ENOMEM;
 	}
 
-	cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 			priv->irq_info[i].mask);
 
 	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
-- 
cgit v1.2.3


From 077b1e8069b9b74477b01d28f6b83774dc19a142 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:35 +0200
Subject: net/mlx5: Mask destination mac value in ethtool steering rules

We need to mask the destination mac value with the destination mac
mask when adding steering rule via ethtool.

Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 3691451c728c..d088effd7160 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
 	}
 	if (fs->flow_type & FLOW_MAC_EXT &&
 	    !is_zero_ether_addr(fs->m_ext.h_dest)) {
+		mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
 					     outer_headers_c, dmac_47_16),
 				fs->m_ext.h_dest);
-- 
cgit v1.2.3


From 9b8c514291a83e53c073b473bdca6267f17a02c2 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:36 +0200
Subject: net/mlx5: Release FTE lock in error flow

Release the FTE lock when adding rule to the FTE has failed.

Fixes: 0fd758d6112f ('net/mlx5: Don't unlock fte while still using it')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a263d8904a4c..0ac7a2fc916c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1263,6 +1263,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 	handle = add_rule_fte(fte, fg, dest, dest_num, false);
 	if (IS_ERR(handle)) {
+		unlock_ref_node(&fte->node);
 		kfree(fte);
 		goto unlock_fg;
 	}
-- 
cgit v1.2.3


From ccce1700263d8b5b219359d04180492a726cea16 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:37 +0200
Subject: net/mlx5: Prevent setting multicast macs for VFs

Need to check that VF mac address entered by the admin user is either
zero or unicast mac.
Multicast mac addresses are prohibited.

Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d6807c3cc461..f14d9c9ba773 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
+	if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
 		return -EINVAL;
 
 	mutex_lock(&esw->state_lock);
-- 
cgit v1.2.3


From 465db5dab86d6688fa5132edd1237102f4a20e84 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:38 +0200
Subject: Revert "net/mlx5e: Expose PCIe statistics to ethtool"

This reverts commit 9c7262399ba12825f3ca4b00a76d8d5e77c720f5.
PCIe counters were introduced in a new firmware version, as a result users
with old firmware encountered a syndrome every 200ms due to update stats
work. This feature will be re-introduced later with appropriate capabilities
infrastructure.

Fixes: 9c7262399ba1 ("net/mlx5e: Expose PCIe statistics to ethtool")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 17 ------------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 24 ----------------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 32 +---------------------
 3 files changed, 1 insertion(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 352462af8d51..33a399a8b5d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -171,7 +171,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 		return NUM_SW_COUNTERS +
 		       MLX5E_NUM_Q_CNTRS(priv) +
 		       NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS +
-		       NUM_PCIE_COUNTERS +
 		       MLX5E_NUM_RQ_STATS(priv) +
 		       MLX5E_NUM_SQ_STATS(priv) +
 		       MLX5E_NUM_PFC_COUNTERS(priv) +
@@ -219,14 +218,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pport_2819_stats_desc[i].format);
 
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pcie_perf_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pcie_tas_stats_desc[i].format);
-
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
@@ -339,14 +330,6 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
 						  pport_2819_stats_desc, i);
 
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
-						  pcie_perf_stats_desc, i);
-
-	for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters,
-						  pcie_tas_stats_desc, i);
-
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cbfa38fc72c0..be5ef036401d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -291,36 +291,12 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
 				      &qcnt->rx_out_of_buffer);
 }
 
-static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
-{
-	struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
-	struct mlx5_core_dev *mdev = priv->mdev;
-	int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
-	void *out;
-	u32 *in;
-
-	in = mlx5_vzalloc(sz);
-	if (!in)
-		return;
-
-	out = pcie_stats->pcie_perf_counters;
-	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
-	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
-
-	out = pcie_stats->pcie_tas_counters;
-	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
-	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
-
-	kvfree(in);
-}
-
 void mlx5e_update_stats(struct mlx5e_priv *priv)
 {
 	mlx5e_update_q_counter(priv);
 	mlx5e_update_vport_counters(priv);
 	mlx5e_update_pport_counters(priv);
 	mlx5e_update_sw_counters(priv);
-	mlx5e_update_pcie_counters(priv);
 }
 
 void mlx5e_update_stats_work(struct work_struct *work)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index f202f872f57f..ba5db1dd23a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -39,7 +39,7 @@
 #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
 	(*(u32 *)((char *)ptr + dsc[i].offset))
 #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
-	be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
@@ -276,32 +276,6 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
 	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
-#define PCIE_PERF_OFF(c) \
-	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
-#define PCIE_PERF_GET(pcie_stats, c) \
-	MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \
-		 counter_set.pcie_perf_cntrs_grp_data_layout.c)
-#define PCIE_TAS_OFF(c) \
-	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c)
-#define PCIE_TAS_GET(pcie_stats, c) \
-	MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \
-		 counter_set.pcie_tas_cntrs_grp_data_layout.c)
-
-struct mlx5e_pcie_stats {
-	__be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
-	__be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
-};
-
-static const struct counter_desc pcie_perf_stats_desc[] = {
-	{ "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
-	{ "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
-};
-
-static const struct counter_desc pcie_tas_stats_desc[] = {
-	{ "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) },
-	{ "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) },
-};
-
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
@@ -386,8 +360,6 @@ static const struct counter_desc sq_stats_desc[] = {
 #define NUM_PPORT_802_3_COUNTERS	ARRAY_SIZE(pport_802_3_stats_desc)
 #define NUM_PPORT_2863_COUNTERS		ARRAY_SIZE(pport_2863_stats_desc)
 #define NUM_PPORT_2819_COUNTERS		ARRAY_SIZE(pport_2819_stats_desc)
-#define NUM_PCIE_PERF_COUNTERS		ARRAY_SIZE(pcie_perf_stats_desc)
-#define NUM_PCIE_TAS_COUNTERS		ARRAY_SIZE(pcie_tas_stats_desc)
 #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \
 	ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \
@@ -397,7 +369,6 @@ static const struct counter_desc sq_stats_desc[] = {
 					 NUM_PPORT_2819_COUNTERS  + \
 					 NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \
 					 NUM_PPORT_PRIO)
-#define NUM_PCIE_COUNTERS		(NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS)
 #define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 
@@ -406,7 +377,6 @@ struct mlx5e_stats {
 	struct mlx5e_qcounter_stats qcnt;
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
-	struct mlx5e_pcie_stats pcie;
 	struct rtnl_link_stats64 vf_vport;
 };
 
-- 
cgit v1.2.3


From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:39 +0200
Subject: Revert "net/mlx5: Add MPCNT register infrastructure"

This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51.

Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  5 ---
 include/linux/mlx5/driver.h   |  1 -
 include/linux/mlx5/mlx5_ifc.h | 93 -------------------------------------------
 3 files changed, 99 deletions(-)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9f489365b3d3..52b437431c6a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,11 +1071,6 @@ enum {
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
-enum {
-	MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
-	MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2,
-};
-
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
 	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ae55361e674..735b36335f29 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -123,7 +123,6 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
-	MLX5_REG_MPCNT		 = 0x9051,
 };
 
 enum mlx5_dcbx_oper_mode {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 57bec544e20a..a852e9db6f0d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 	u8         reserved_at_4c0[0x300];
 };
 
-struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         rx_errors[0x20];
-
-	u8         tx_errors[0x20];
-
-	u8         l0_to_recovery_eieos[0x20];
-
-	u8         l0_to_recovery_ts[0x20];
-
-	u8         l0_to_recovery_framing[0x20];
-
-	u8         l0_to_recovery_retrain[0x20];
-
-	u8         crc_error_dllp[0x20];
-
-	u8         crc_error_tlp[0x20];
-
-	u8         reserved_at_140[0x680];
-};
-
-struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         time_to_boot_image_start[0x20];
-
-	u8         time_to_link_image[0x20];
-
-	u8         calibration_time[0x20];
-
-	u8         time_to_first_perst[0x20];
-
-	u8         time_to_detect_state[0x20];
-
-	u8         time_to_l0[0x20];
-
-	u8         time_to_crs_en[0x20];
-
-	u8         time_to_plastic_image_start[0x20];
-
-	u8         time_to_iron_image_start[0x20];
-
-	u8         perst_handler[0x20];
-
-	u8         times_in_l1[0x20];
-
-	u8         times_in_l23[0x20];
-
-	u8         dl_down[0x20];
-
-	u8         config_cycle1usec[0x20];
-
-	u8         config_cycle2to7usec[0x20];
-
-	u8         config_cycle_8to15usec[0x20];
-
-	u8         config_cycle_16_to_63usec[0x20];
-
-	u8         config_cycle_64usec[0x20];
-
-	u8         correctable_err_msg_sent[0x20];
-
-	u8         non_fatal_err_msg_sent[0x20];
-
-	u8         fatal_err_msg_sent[0x20];
-
-	u8         reserved_at_2e0[0x4e0];
-};
-
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
@@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	u8         reserved_at_0[0x7c0];
 };
 
-union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
-	struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
-	struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout;
-	u8         reserved_at_0[0x7c0];
-};
-
 union mlx5_ifc_event_auto_bits {
 	struct mlx5_ifc_comp_event_bits comp_event;
 	struct mlx5_ifc_dct_events_bits dct_events;
@@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
-struct mlx5_ifc_mpcnt_reg_bits {
-	u8         reserved_at_0[0x8];
-	u8         pcie_index[0x8];
-	u8         reserved_at_10[0xa];
-	u8         grp[0x6];
-
-	u8         clr[0x1];
-	u8         reserved_at_21[0x1f];
-
-	union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
-};
-
 struct mlx5_ifc_ppad_reg_bits {
 	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
@@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
 	struct mlx5_ifc_ppad_reg_bits ppad_reg;
 	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
-	struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
 	struct mlx5_ifc_pplm_reg_bits pplm_reg;
 	struct mlx5_ifc_pplr_reg_bits pplr_reg;
 	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
-- 
cgit v1.2.3


From 4525a45bfad55a00ef218c5fbe5d98a3d8170bf5 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:40 +0200
Subject: net/mlx5e: Check ets capability before initializing ets settings

During the initial setup, the ets command is sent to firmware
without checking if the HCA supports ets. This causes the invalid
command error. Add the ets capiblity check before sending firmware
command to initialize ets settings.

Fixes: e207b7e99176 ("net/mlx5e: ConnectX-4 firmware support for DCBX")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 7f6c225666c1..f0b460f47f29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -723,6 +723,9 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
 	int i;
 	struct ieee_ets ets;
 
+	if (!MLX5_CAP_GEN(priv->mdev, ets))
+		return;
+
 	memset(&ets, 0, sizeof(ets));
 	ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
 	for (i = 0; i < ets.ets_cap; i++) {
-- 
cgit v1.2.3


From 610e89e05c3f28a7394935aa6b91f99548c4fd3c Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:41 +0200
Subject: net/mlx5e: Don't sync netdev state when not registered

Skip setting netdev vxlan ports and netdev rx_mode on driver load
when netdev is not yet registered.

Synchronizing with netdev state is needed only on reset flow where the
netdev remains registered for the whole reset period.

This also fixes an access before initialization of net_device.addr_list_lock
- which for some reason initialized on register_netdev - where we queued
set_rx_mode work on driver load before netdev registration.

Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index be5ef036401d..cf270f6c90e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3781,14 +3781,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 	mlx5_lag_add(mdev, netdev);
 
-	if (mlx5e_vxlan_allowed(mdev)) {
-		rtnl_lock();
-		udp_tunnel_get_rx_info(netdev);
-		rtnl_unlock();
-	}
-
 	mlx5e_enable_async_events(priv);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
 
 	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
 		mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
@@ -3798,6 +3791,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 		rep.netdev = netdev;
 		mlx5_eswitch_register_vport_rep(esw, 0, &rep);
 	}
+
+	if (netdev->reg_state != NETREG_REGISTERED)
+		return;
+
+	/* Device already registered: sync netdev system state */
+	if (mlx5e_vxlan_allowed(mdev)) {
+		rtnl_lock();
+		udp_tunnel_get_rx_info(netdev);
+		rtnl_unlock();
+	}
+
+	queue_work(priv->wq, &priv->set_rx_mode_work);
 }
 
 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
-- 
cgit v1.2.3


From 37f304d10030bb425c19099e7b955d9c3ec4cba3 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:42 +0200
Subject: net/mlx5e: Disable netdev after close

Disable netdev should come after it was closed, although no harm of doing it
before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way.

Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cf270f6c90e8..1236b27b1493 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3947,10 +3947,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 	const struct mlx5e_profile *profile = priv->profile;
 
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
-	if (profile->disable)
-		profile->disable(priv);
-
-	flush_workqueue(priv->wq);
 
 	rtnl_lock();
 	if (netif_running(netdev))
@@ -3958,6 +3954,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 	netif_device_detach(netdev);
 	rtnl_unlock();
 
+	if (profile->disable)
+		profile->disable(priv);
+	flush_workqueue(priv->wq);
+
 	mlx5e_destroy_q_counter(priv);
 	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(priv);
-- 
cgit v1.2.3


From 9dd0f896d2cc5815d859e945db90915071cd44b3 Mon Sep 17 00:00:00 2001
From: Augusto Mecking Caringi <augustocaringi@gmail.com>
Date: Wed, 28 Dec 2016 16:02:05 +0000
Subject: net: atm: Fix warnings in net/atm/lec.c when !CONFIG_PROC_FS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes the following warnings when CONFIG_PROC_FS is not set:

linux/net/atm/lec.c: In function ‘lane_module_cleanup’:
linux/net/atm/lec.c:1062:27: error: ‘atm_proc_root’ undeclared (first
use in this function)
remove_proc_entry("lec", atm_proc_root);
                           ^
linux/net/atm/lec.c:1062:27: note: each undeclared identifier is
reported only once for each function it appears in

Signed-off-by: Augusto Mecking Caringi <augustocaringi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/atm/lec.c b/net/atm/lec.c
index 019557d0a11d..09cfe87f0a44 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1059,7 +1059,9 @@ static void __exit lane_module_cleanup(void)
 {
 	int i;
 
+#ifdef CONFIG_PROC_FS
 	remove_proc_entry("lec", atm_proc_root);
+#endif
 
 	deregister_atm_ioctl(&lane_ioctl_ops);
 
-- 
cgit v1.2.3


From 60133867f1f111aaf3a8c00375b8026142a9a591 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Dec 2016 16:44:23 +0000
Subject: net: wan: slic_ds26522: fix spelling mistake: "configurated" ->
 "configured"

trivial fix to spelling mistake in pr_info message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/slic_ds26522.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c
index b776a0ab106c..9d9b4e0def2a 100644
--- a/drivers/net/wan/slic_ds26522.c
+++ b/drivers/net/wan/slic_ds26522.c
@@ -218,7 +218,7 @@ static int slic_ds26522_probe(struct spi_device *spi)
 
 	ret = slic_ds26522_init_configure(spi);
 	if (ret == 0)
-		pr_info("DS26522 cs%d configurated\n", spi->chip_select);
+		pr_info("DS26522 cs%d configured\n", spi->chip_select);
 
 	return ret;
 }
-- 
cgit v1.2.3


From e4c5e13aa45c23692e4acf56f0b3533f328199b2 Mon Sep 17 00:00:00 2001
From: Zheng Li <james.z.li@ericsson.com>
Date: Wed, 28 Dec 2016 23:23:46 +0800
Subject: ipv6: Should use consistent conditional judgement for ip6 fragment
 between __ip6_append_data and ip6_finish_output

There is an inconsistent conditional judgement between __ip6_append_data
and ip6_finish_output functions, the variable length in __ip6_append_data
just include the length of application's payload and udp6 header, don't
include the length of ipv6 header, but in ip6_finish_output use
(skb->len > ip6_skb_dst_mtu(skb)) as judgement, and skb->len include the
length of ipv6 header.

That causes some particular application's udp6 payloads whose length are
between (MTU - IPv6 Header) and MTU were fragmented by ip6_fragment even
though the rst->dev support UFO feature.

Add the length of ipv6 header to length in __ip6_append_data to keep
consistent conditional judgement as ip6_finish_output for ip6 fragment.

Signed-off-by: Zheng Li <james.z.li@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 70d0de404197..38122d04fadc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1373,7 +1373,7 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if (((length > mtu) ||
+	if ((((length + fragheaderlen) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
-- 
cgit v1.2.3


From b2eb09af7370fedc6b9d9f05762f01625438467a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Dec 2016 15:44:41 -0800
Subject: net: stmmac: Fix error path after register_netdev move

Commit 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and
stmmac_open") re-ordered how the MDIO bus registration and the network
device are registered, but missed to unwind the MDIO bus registration in
case we fail to register the network device.

Fixes: 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5910ea51f8f6..39eb7a65bb9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3366,12 +3366,19 @@ int stmmac_dvr_probe(struct device *device,
 	}
 
 	ret = register_netdev(ndev);
-	if (ret)
+	if (ret) {
 		netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
 			   __func__, ret);
+		goto error_netdev_register;
+	}
 
 	return ret;
 
+error_netdev_register:
+	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
+	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	    priv->hw->pcs != STMMAC_PCS_RTBI)
+		stmmac_mdio_unregister(ndev);
 error_mdio_register:
 	netif_napi_del(&priv->napi);
 error_hw_init:
-- 
cgit v1.2.3


From b91e1302ad9b80c174a4855533f7e3aa2873355e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 27 Dec 2016 11:40:38 -0800
Subject: mm: optimize PageWaiters bit use for unlock_page()

In commit 62906027091f ("mm: add PageWaiters indicating tasks are
waiting for a page bit") Nick Piggin made our page locking no longer
unconditionally touch the hashed page waitqueue, which not only helps
performance in general, but is particularly helpful on NUMA machines
where the hashed wait queues can bounce around a lot.

However, the "clear lock bit atomically and then test the waiters bit"
sequence turns out to be much more expensive than it needs to be,
because you get a nasty stall when trying to access the same word that
just got updated atomically.

On architectures where locking is done with LL/SC, this would be trivial
to fix with a new primitive that clears one bit and tests another
atomically, but that ends up not working on x86, where the only atomic
operations that return the result end up being cmpxchg and xadd.  The
atomic bit operations return the old value of the same bit we changed,
not the value of an unrelated bit.

On x86, we could put the lock bit in the high bit of the byte, and use
"xadd" with that bit (where the overflow ends up not touching other
bits), and look at the other bits of the result.  However, an even
simpler model is to just use a regular atomic "and" to clear the lock
bit, and then the sign bit in eflags will indicate the resulting state
of the unrelated bit #7.

So by moving the PageWaiters bit up to bit #7, we can atomically clear
the lock bit and test the waiters bit on x86 too.  And architectures
with LL/SC (which is all the usual RISC suspects), the particular bit
doesn't matter, so they are fine with this approach too.

This avoids the extra access to the same atomic word, and thus avoids
the costly stall at page unlock time.

The only downside is that the interface ends up being a bit odd and
specialized: clear a bit in a byte, and test the sign bit.  Nick doesn't
love the resulting name of the new primitive, but I'd rather make the
name be descriptive and very clear about the limitation imposed by
trying to work across all relevant architectures than make it be some
generic thing that doesn't make the odd semantics explicit.

So this introduces the new architecture primitive

    clear_bit_unlock_is_negative_byte();

and adds the trivial implementation for x86.  We have a generic
non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)"
combination) which can be overridden by any architecture that can do
better.  According to Nick, Power has the same hickup x86 has, for
example, but some other architectures may not even care.

All these optimizations mean that my page locking stress-test (which is
just executing a lot of small short-lived shell scripts: "make test" in
the git source tree) no longer makes our page locking look horribly bad.
Before all these optimizations, just the unlock_page() costs were just
over 3% of all CPU overhead on "make test".  After this, it's down to
0.66%, so just a quarter of the cost it used to be.

(The difference on NUMA is bigger, but there this micro-optimization is
likely less noticeable, since the big issue on NUMA was not the accesses
to 'struct page', but the waitqueue accesses that were already removed
by Nick's earlier commit).

Acked-by: Nick Piggin <npiggin@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/bitops.h | 13 +++++++++++++
 include/linux/page-flags.h    |  2 +-
 mm/filemap.c                  | 36 +++++++++++++++++++++++++++++++-----
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 68557f52b961..854022772c5b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
 
+static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+{
+	bool negative;
+	asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+		CC_SET(s)
+		: CC_OUT(s) (negative), ADDR
+		: "ir" ((char) ~(1 << nr)) : "memory");
+	return negative;
+}
+
+// Let everybody know we have it
+#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+
 /*
  * __clear_bit_unlock - Clears a bit in memory
  * @nr: Bit to clear
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c56b39890a41..6b5818d6de32 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,13 +73,13 @@
  */
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
-	PG_waiters,		/* Page has waiters, check its waitqueue */
 	PG_error,
 	PG_referenced,
 	PG_uptodate,
 	PG_dirty,
 	PG_lru,
 	PG_active,
+	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
diff --git a/mm/filemap.c b/mm/filemap.c
index 82f26cde830c..6b1d96f86a9c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
 }
 EXPORT_SYMBOL_GPL(add_page_wait_queue);
 
+#ifndef clear_bit_unlock_is_negative_byte
+
+/*
+ * PG_waiters is the high bit in the same byte as PG_lock.
+ *
+ * On x86 (and on many other architectures), we can clear PG_lock and
+ * test the sign bit at the same time. But if the architecture does
+ * not support that special operation, we just do this all by hand
+ * instead.
+ *
+ * The read of PG_waiters has to be after (or concurrently with) PG_locked
+ * being cleared, but a memory barrier should be unneccssary since it is
+ * in the same byte as PG_locked.
+ */
+static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
+{
+	clear_bit_unlock(nr, mem);
+	/* smp_mb__after_atomic(); */
+	return test_bit(PG_waiters);
+}
+
+#endif
+
 /**
  * unlock_page - unlock a locked page
  * @page: the page
@@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
  * mechanism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
- * The mb is necessary to enforce ordering between the clear_bit and the read
- * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
+ * Note that this depends on PG_waiters being the sign bit in the byte
+ * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
+ * clear the PG_locked bit and test PG_waiters at the same time fairly
+ * portably (architectures that do LL/SC can test any bit, while x86 can
+ * test the sign bit).
  */
 void unlock_page(struct page *page)
 {
+	BUILD_BUG_ON(PG_waiters != 7);
 	page = compound_head(page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	clear_bit_unlock(PG_locked, &page->flags);
-	smp_mb__after_atomic();
-	wake_up_page(page, PG_locked);
+	if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
+		wake_up_page_bit(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
 
-- 
cgit v1.2.3


From 4775cc1f2d5abca894ac32774eefc22c45347d1c Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 28 Dec 2016 17:52:15 +0100
Subject: rtnl: stats - add missing netlink message size checks

We miss to check if the netlink message is actually big enough to contain
a struct if_stats_msg.

Add a check to prevent userland from sending us short messages that would
make us access memory beyond the end of the message.

Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 18b5aae99bec..75e3ea7bda08 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3898,6 +3898,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
 	u32 filter_mask;
 	int err;
 
+	if (nlmsg_len(nlh) < sizeof(*ifsm))
+		return -EINVAL;
+
 	ifsm = nlmsg_data(nlh);
 	if (ifsm->ifindex > 0)
 		dev = __dev_get_by_index(net, ifsm->ifindex);
@@ -3947,6 +3950,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->seq = net->dev_base_seq;
 
+	if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
+		return -EINVAL;
+
 	ifsm = nlmsg_data(cb->nlh);
 	filter_mask = ifsm->filter_mask;
 	if (!filter_mask)
-- 
cgit v1.2.3


From f0c16ba8933ed217c2688b277410b2a37ba81591 Mon Sep 17 00:00:00 2001
From: Wei Zhang <asuka.com@163.com>
Date: Thu, 29 Dec 2016 16:45:04 +0800
Subject: net: fix incorrect original ingress device index in PKTINFO

When we send a packet for our own local address on a non-loopback
interface (e.g. eth0), due to the change had been introduced from
commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the
original ingress device index would be set as the loopback interface.
However, the packet should be considered as if it is being arrived via the
sending interface (eth0), otherwise it would break the expectation of the
userspace application (e.g. the DHCPRELEASE message from dhcp_release
binary would be ignored by the dnsmasq daemon, since it come from lo which
is not the interface dnsmasq bind to)

Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO")
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Wei Zhang <asuka.com@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 57e1405e8282..53ae0c6315ad 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1225,8 +1225,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		 * which has interface index (iif) as the first member of the
 		 * underlying inet{6}_skb_parm struct. This code then overlays
 		 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
-		 * element so the iif is picked up from the prior IPCB
+		 * element so the iif is picked up from the prior IPCB. If iif
+		 * is the loopback interface, then return the sending interface
+		 * (e.g., process binds socket to eth0 for Tx which is
+		 * redirected to loopback in the rtable/dst).
 		 */
+		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
+			pktinfo->ipi_ifindex = inet_iif(skb);
+
 		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
 	} else {
 		pktinfo->ipi_ifindex = 0;
-- 
cgit v1.2.3


From 3b01fe7f91c8e4f9afc4fae3c5af72c14958d2d8 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 Dec 2016 18:37:09 +0200
Subject: net/mlx4_core: Use-after-free causes a resource leak in flow-steering
 detach

mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering
rule (which results in freeing the rule structure), and then
references a field in this struct (the qp number) when releasing the
busy-status on the rule's qp.

Since this memory was freed, it could reallocated and changed.
Therefore, the qp number in the struct may be incorrect,
so that we are releasing the incorrect qp. This leaves the rule's qp
in the busy state (and could possibly release an incorrect qp as well).

Fix this by saving the qp number in a local variable, for use after
removing the steering rule.

Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index c548beaaf910..4b3e139e9c82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4473,6 +4473,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 	struct res_qp *rqp;
 	struct res_fs_rule *rrule;
 	u64 mirr_reg_id;
+	int qpn;
 
 	if (dev->caps.steering_mode !=
 	    MLX4_STEERING_MODE_DEVICE_MANAGED)
@@ -4489,10 +4490,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 	}
 	mirr_reg_id = rrule->mirr_rule_id;
 	kfree(rrule->mirr_mbox);
+	qpn = rrule->qpn;
 
 	/* Release the rule form busy state before removal */
 	put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
-	err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp);
+	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err)
 		return err;
 
@@ -4517,7 +4519,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 	if (!err)
 		atomic_dec(&rqp->ref_count);
 out:
-	put_res(dev, slave, rrule->qpn, RES_QP);
+	put_res(dev, slave, qpn, RES_QP);
 	return err;
 }
 
-- 
cgit v1.2.3


From 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 29 Dec 2016 18:37:10 +0200
Subject: net/mlx4_en: Fix bad WQE issue

Single send WQE in RX buffer should be stamped with software
ownership in order to prevent the flow of QP in error in FW
once UPDATE_QP is called.

Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 3c37e216bbf3..eac527e25ec9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
 		ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
 
 		ring->stride = stride;
-		if (ring->stride <= TXBB_SIZE)
+		if (ring->stride <= TXBB_SIZE) {
+			/* Stamp first unused send wqe */
+			__be32 *ptr = (__be32 *)ring->buf;
+			__be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
+			*ptr = stamp;
+			/* Move pointer to start of rx section */
 			ring->buf += TXBB_SIZE;
+		}
 
 		ring->log_stride = ffs(ring->stride) - 1;
 		ring->buf_size = ring->size * ring->stride;
-- 
cgit v1.2.3


From c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 29 Dec 2016 18:37:11 +0200
Subject: net/mlx4: Remove BUG_ON from ICM allocation routine

This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent()
by checking DMA address alignment in advance and performing proper
folding in case of error.

Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory")
Reported-by: Ozgur Karatas <okaratas@member.fsf.org>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 2a9dd460a95f..e1f9e7cebf8f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
 	if (!buf)
 		return -ENOMEM;
 
+	if (offset_in_page(buf)) {
+		dma_free_coherent(dev, PAGE_SIZE << order,
+				  buf, sg_dma_address(mem));
+		return -ENOMEM;
+	}
+
 	sg_set_buf(mem, buf, PAGE_SIZE << order);
-	BUG_ON(mem->offset);
 	sg_dma_len(mem) = PAGE_SIZE << order;
 	return 0;
 }
-- 
cgit v1.2.3


From 61b6034c6cfdcb265bb453505c3d688e7567727a Mon Sep 17 00:00:00 2001
From: Slava Shwartsman <slavash@mellanox.com>
Date: Thu, 29 Dec 2016 18:37:12 +0200
Subject: net/mlx4_en: Fix type mismatch for 32-bit systems

is_power_of_2 expects unsigned long and we pass u64 max_val_cycles,
this will be truncated on 32 bit systems, and the result is not what we
were expecting.
div_u64 expects u32 as a second argument and we pass
max_val_cycles_rounded which is u64 hence it will always be truncated.
Fix was tested on both 64 and 32 bit systems and got same results for
max_val_cycles and max_val_cycles_rounded.

Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system")
Signed-off-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 015198c14fa8..504461a464c5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq)
 {
 	u32 freq_khz = freq * 1000;
 	u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
-	u64 tmp_rounded =
-		roundup_pow_of_two(max_val_cycles) > max_val_cycles ?
-		roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX;
-	u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
-		max_val_cycles : tmp_rounded;
+	u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
 	/* calculate max possible multiplier in order to fit in 64bit */
-	u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+	u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
 
 	/* This comes from the reverse of clocksource_khz2mult */
 	return ilog2(div_u64(max_mul * freq_khz, 1000000));
-- 
cgit v1.2.3


From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 Dec 2016 18:37:13 +0200
Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV

Demoting simple flow steering rule priority (for DPDK) was achieved by
wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF
as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper
function for the PF and all VFs.

In function mlx4_ib_create_flow(), this change caused the main rule
creation for the PF to be wrapped, while it left the associated
tunnel steering rule creation unwrapped for the PF.

This mismatch caused rule deletion failures in mlx4_ib_destroy_flow()
for the PF when the detach wrapper function did not find the associated
tunnel-steering rule (since creation of that rule for the PF did not
go through the wrapper function).

Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native"
(so that the PF invocation does not go through the wrapper), and perform
the required priority demotion for the PF in the mlx4_ib_create_flow()
code path.

Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c                  | 14 ++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/main.c          | 18 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 22 +---------------------
 include/linux/mlx4/device.h                        |  2 ++
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c8413fc120e6..7031a8dd4d14 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 		size += ret;
 	}
 
+	if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
+	    flow_attr->num_of_specs == 1) {
+		struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
+		enum ib_flow_spec_type header_spec =
+			((union ib_flow_spec *)(flow_attr + 1))->type;
+
+		if (header_spec == IB_FLOW_SPEC_ETH)
+			mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
+	}
+
 	ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
 			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
-			   MLX4_CMD_WRAPPED);
+			   MLX4_CMD_NATIVE);
 	if (ret == -ENOMEM)
 		pr_err("mcg table is full. Fail to register network rule.\n");
 	else if (ret == -ENXIO)
@@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
 	int err;
 	err = mlx4_cmd(dev, reg_id, 0, 0,
 		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
-		       MLX4_CMD_WRAPPED);
+		       MLX4_CMD_NATIVE);
 	if (err)
 		pr_err("Fail to detach network rule. registration id = 0x%llx\n",
 		       reg_id);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 5e7840a7a33b..bffa6f345f2f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -42,6 +42,7 @@
 #include <linux/io-mapping.h>
 #include <linux/delay.h>
 #include <linux/kmod.h>
+#include <linux/etherdevice.h>
 #include <net/devlink.h>
 
 #include <linux/mlx4/device.h>
@@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
 }
 EXPORT_SYMBOL(mlx4_is_slave_active);
 
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+				       struct _rule_hw *eth_header)
+{
+	if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
+	    is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+		struct mlx4_net_trans_rule_hw_eth *eth =
+			(struct mlx4_net_trans_rule_hw_eth *)eth_header;
+		struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
+		bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
+			next_rule->rsvd == 0;
+
+		if (last_rule)
+			ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
+	}
+}
+EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
+
 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
 				       struct mlx4_dev_cap *dev_cap,
 				       struct mlx4_init_hca_param *hca_param)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4b3e139e9c82..56185a0b827d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
 	return 0;
 }
 
-static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
-					 struct _rule_hw *eth_header)
-{
-	if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
-	    is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
-		struct mlx4_net_trans_rule_hw_eth *eth =
-			(struct mlx4_net_trans_rule_hw_eth *)eth_header;
-		struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
-		bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
-			next_rule->rsvd == 0;
-
-		if (last_rule)
-			ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
-	}
-}
-
 /*
  * In case of missing eth header, append eth header with a MAC address
  * assigned to the VF.
@@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
 
 	if (header_id == MLX4_NET_TRANS_RULE_ID_ETH)
-		handle_eth_header_mcast_prio(ctrl, rule_header);
-
-	if (slave == dev->caps.function)
-		goto execute;
+		mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
 
 	switch (header_id) {
 	case MLX4_NET_TRANS_RULE_ID_ETH:
@@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 		goto err_put_qp;
 	}
 
-execute:
 	err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
 			   vhcr->in_modifier, 0,
 			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 93bdb3485192..6533c16e27ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
 int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
 int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
 				      bool *vlan_offload_disabled);
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+				       struct _rule_hw *eth_header);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 41744213602a206f24adcb4a2b7551db3c700e72 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 26 Dec 2016 12:46:01 +0100
Subject: parisc: Mark cr16 clocksource unstable on SMP systems

The cr16 interval timer of each CPU is not syncronized to other cr16
timers in other CPUs in a SMP system. So, delay the registration of the
cr16 clocksource until all CPUs have been detected and then - if we are
on a SMP machine - mark the cr16 clocksource as unstable and lower it's
rating before registering it at the clocksource framework.

This patch fixes the stalled CPU warnings which we have seen since
introduction of the cr16 clocksource.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # v4.8+
---
 arch/parisc/kernel/time.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index da0d9cb63403..1e22f981cd81 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -235,9 +235,26 @@ void __init time_init(void)
 
 	cr16_hz = 100 * PAGE0->mem_10msec;  /* Hz */
 
-	/* register at clocksource framework */
-	clocksource_register_hz(&clocksource_cr16, cr16_hz);
-
 	/* register as sched_clock source */
 	sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz);
 }
+
+static int __init init_cr16_clocksource(void)
+{
+	/*
+	 * The cr16 interval timers are not syncronized across CPUs, so mark
+	 * them unstable and lower rating on SMP systems.
+	 */
+	if (num_online_cpus() > 1) {
+		clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
+		clocksource_cr16.rating = 0;
+	}
+
+	/* register at clocksource framework */
+	clocksource_register_hz(&clocksource_cr16,
+		100 * PAGE0->mem_10msec);
+
+	return 0;
+}
+
+device_initcall(init_cr16_clocksource);
-- 
cgit v1.2.3


From 1fe0a7e0bc52024a445945c9e7691551aba97390 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 27 Dec 2016 18:03:50 +0100
Subject: parisc: Drop TIF_RESTORE_SIGMASK and switch to generic code

Commit 7e7814180b33 ("signal: consolidate {TS,TLF}_RESTORE_SIGMASK code")
introduced code with which the "restore sigmask" flag lives in task_struct
instead of ti->flags. Let's use this optimization on parisc too.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/thread_info.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 7581330ea35b..88fe0aad4390 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -49,7 +49,6 @@ struct thread_info {
 #define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_32BIT               4       /* 32 bit binary */
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 #define TIF_SINGLESTEP		9	/* single stepping? */
-- 
cgit v1.2.3


From 98473f9f3f9bd404873cd1178c8be7d6d619f0d1 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Thu, 29 Dec 2016 14:16:07 -0800
Subject: mm/filemap: fix parameters to test_bit()

 mm/filemap.c: In function 'clear_bit_unlock_is_negative_byte':
  mm/filemap.c:933:9: error: too few arguments to function 'test_bit'
    return test_bit(PG_waiters);
         ^~~~~~~~

Fixes: b91e1302ad9b ('mm: optimize PageWaiters bit use for unlock_page()')
Signed-off-by: Olof Johansson <olof@lixom.net>
Brown-paper-bag-by: Linus Torvalds <dummy@duh.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 6b1d96f86a9c..d0e4d1002059 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -930,7 +930,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
 {
 	clear_bit_unlock(nr, mem);
 	/* smp_mb__after_atomic(); */
-	return test_bit(PG_waiters);
+	return test_bit(PG_waiters, mem);
 }
 
 #endif
-- 
cgit v1.2.3


From 2344ef3c86a7fe41f97bf66c7936001b6132860b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Fri, 30 Dec 2016 00:07:38 +0300
Subject: sh_eth: fix branch prediction in sh_eth_interrupt()

IIUC, likely()/unlikely() should apply to the whole *if* statement's
expression, not a part of it  -- fix such expression in  sh_eth_interrupt()
accordingly...

Fixes: 283e38db65e7 ("sh_eth: Fix serialisation of interrupt disable with interrupt & NAPI handlers")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f341c1bc7001..0af7fc279c85 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1656,7 +1656,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev)
 	else
 		goto out;
 
-	if (!likely(mdp->irq_enabled)) {
+	if (unlikely(!mdp->irq_enabled)) {
 		sh_eth_write(ndev, 0, EESIPR);
 		goto out;
 	}
-- 
cgit v1.2.3


From f5a0aab84b74de68523599817569c057c7ac1622 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 29 Dec 2016 15:29:03 -0800
Subject: net: ipv4: dst for local input routes should use l3mdev if relevant

IPv4 output routes already use l3mdev device instead of loopback for dst's
if it is applicable. Change local input routes to do the same.

This fixes icmp responses for unreachable UDP ports which are directed
to the wrong table after commit 9d1a6c4ea43e4 because local_input
routes use the loopback device. Moving from ingress device to loopback
loses the L3 domain causing responses based on the dst to get to lost.

Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to
		       determine L3 domain")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a82a11747b3f..0fcac8e7a2b2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1914,7 +1914,8 @@ local_input:
 		}
 	}
 
-	rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
+	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+			   flags | RTCF_LOCAL, res.type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
-- 
cgit v1.2.3


From 42d97eb0ade31e1bc537d086842f5d6e766d9d51 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 Dec 2016 14:20:13 -0800
Subject: fscrypt: fix renaming and linking special files

Attempting to link a device node, named pipe, or socket file into an
encrypted directory through rename(2) or link(2) always failed with
EPERM.  This happened because fscrypt_has_permitted_context() saw that
the file was unencrypted and forbid creating the link.  This behavior
was unexpected because such files are never encrypted; only regular
files, directories, and symlinks can be encrypted.

To fix this, make fscrypt_has_permitted_context() always return true on
special files.

This will be covered by a test in my encryption xfstests patchset.

Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Cc: stable@vger.kernel.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/policy.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 6ed7c2eebeec..d6cd7ea4851d 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -179,6 +179,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 		BUG_ON(1);
 	}
 
+	/* No restrictions on file types which are never encrypted */
+	if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
+	    !S_ISLNK(child->i_mode))
+		return 1;
+
 	/* no restrictions if the parent directory is not encrypted */
 	if (!parent->i_sb->s_cop->is_encrypted(parent))
 		return 1;
-- 
cgit v1.2.3


From e1a3a60a2ebe991605acb14cd58e39c0545e174e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 30 Dec 2016 17:42:32 -0600
Subject: net: socket: don't set sk_uid to garbage value in ->setattr()

->setattr() was recently implemented for socket files to sync the socket
inode's uid to the new 'sk_uid' member of struct sock.  It does this by
copying over the ia_uid member of struct iattr.  However, ia_uid is
actually only valid when ATTR_UID is set in ia_valid, indicating that
the uid is being changed, e.g. by chown.  Other metadata operations such
as chmod or utimes leave ia_uid uninitialized.  Therefore, sk_uid could
be set to a "garbage" value from the stack.

Fix this by only copying the uid over when ATTR_UID is set.

Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Tested-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index 8487bf136e5c..a8c2307590b8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -537,7 +537,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	int err = simple_setattr(dentry, iattr);
 
-	if (!err) {
+	if (!err && (iattr->ia_valid & ATTR_UID)) {
 		struct socket *sock = SOCKET_I(d_inode(dentry));
 
 		sock->sk->sk_uid = iattr->ia_uid;
-- 
cgit v1.2.3


From 0c744ea4f77d72b3dcebb7a8f2684633ec79be88 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 1 Jan 2017 14:31:53 -0800
Subject: Linux 4.10-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ec411ba9e40f..5470d599384a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Roaring Lionus
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 086cc1c31a0ec075dac02425367c871bb65bc2c9 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Wed, 14 Dec 2016 21:27:57 +0900
Subject: openrisc: Add _text symbol to fix ksym build error

The build robot reports:

   .tmp_kallsyms1.o: In function `kallsyms_relative_base':
>> (.rodata+0x8a18): undefined reference to `_text'

This is when using 'make alldefconfig'. Adding this _text symbol to mark
the start of the kernel as in other architecture fixes this.

Signed-off-by: Stafford Horne <shorne@gmail.com>
Acked-by: Jonas Bonn <jonas@southpole.se>
---
 arch/openrisc/kernel/vmlinux.lds.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index ef31fc24344e..552544616b9d 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -44,6 +44,8 @@ SECTIONS
         /* Read-only sections, merged into text segment: */
         . = LOAD_BASE ;
 
+	_text = .;
+
 	/* _s_kernel_ro must be page aligned */
 	. = ALIGN(PAGE_SIZE);
 	_s_kernel_ro = .;
-- 
cgit v1.2.3


From 4200462d88f47f3759bdf4705f87e207b0f5b2e4 Mon Sep 17 00:00:00 2001
From: Reiter Wolfgang <wr0112358@gmail.com>
Date: Sat, 31 Dec 2016 21:11:57 +0100
Subject: drop_monitor: add missing call to genlmsg_end

Update nlmsg_len field with genlmsg_end to enable userspace processing
using nlmsg_next helper. Also adds error handling.

Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 8e0c0635ee97..f465bad2ef2c 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -75,6 +75,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 	struct nlattr *nla;
 	struct sk_buff *skb;
 	unsigned long flags;
+	void *msg_header;
 
 	al = sizeof(struct net_dm_alert_msg);
 	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
@@ -82,17 +83,31 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 
 	skb = genlmsg_new(al, GFP_KERNEL);
 
-	if (skb) {
-		genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
-				0, NET_DM_CMD_ALERT);
-		nla = nla_reserve(skb, NLA_UNSPEC,
-				  sizeof(struct net_dm_alert_msg));
-		msg = nla_data(nla);
-		memset(msg, 0, al);
-	} else {
-		mod_timer(&data->send_timer, jiffies + HZ / 10);
+	if (!skb)
+		goto err;
+
+	msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+				 0, NET_DM_CMD_ALERT);
+	if (!msg_header) {
+		nlmsg_free(skb);
+		skb = NULL;
+		goto err;
+	}
+	nla = nla_reserve(skb, NLA_UNSPEC,
+			  sizeof(struct net_dm_alert_msg));
+	if (!nla) {
+		nlmsg_free(skb);
+		skb = NULL;
+		goto err;
 	}
+	msg = nla_data(nla);
+	memset(msg, 0, al);
+	genlmsg_end(skb, msg_header);
+	goto out;
 
+err:
+	mod_timer(&data->send_timer, jiffies + HZ / 10);
+out:
 	spin_lock_irqsave(&data->lock, flags);
 	swap(data->skb, skb);
 	spin_unlock_irqrestore(&data->lock, flags);
-- 
cgit v1.2.3


From 97b84fd6d91766ea57dcc350d78f42639e011c30 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 30 Dec 2016 19:48:19 +0100
Subject: l2tp: consider '::' as wildcard address in l2tp_ip6 socket lookup

An L2TP socket bound to the unspecified address should match with any
address. If not, it can't receive any packet and __l2tp_ip6_bind_lookup()
can't prevent another socket from binding on the same device/tunnel ID.

While there, rename the 'addr' variable to 'sk_laddr' (local addr), to
make following patch clearer.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index f092ac441fdd..3135b9d55df5 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -64,7 +64,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 	struct sock *sk;
 
 	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
-		const struct in6_addr *addr = inet6_rcv_saddr(sk);
+		const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
 		if (l2tp == NULL)
@@ -72,7 +72,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 
 		if ((l2tp->conn_id == tunnel_id) &&
 		    net_eq(sock_net(sk), net) &&
-		    (!addr || ipv6_addr_equal(addr, laddr)) &&
+		    (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) &&
 		    (!sk->sk_bound_dev_if || !dif ||
 		     sk->sk_bound_dev_if == dif))
 			goto found;
-- 
cgit v1.2.3


From a9b2dff80be979432484afaf7f8d8e73f9e8838a Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 30 Dec 2016 19:48:20 +0100
Subject: l2tp: take remote address into account in l2tp_ip and l2tp_ip6 socket
 lookups

For connected sockets, __l2tp_ip{,6}_bind_lookup() needs to check the
remote IP when looking for a matching socket. Otherwise a connected
socket can receive traffic not originating from its peer.

Drop l2tp_ip_bind_lookup() and l2tp_ip6_bind_lookup() instead of
updating their prototype, as these functions aren't used.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 19 ++++++-------------
 net/l2tp/l2tp_ip6.c | 20 ++++++--------------
 2 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 8938b6ba57a0..3d73278b86ca 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -47,7 +47,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
 	return (struct l2tp_ip_sock *)sk;
 }
 
-static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
+					  __be32 raddr, int dif, u32 tunnel_id)
 {
 	struct sock *sk;
 
@@ -61,6 +62,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif
 		if ((l2tp->conn_id == tunnel_id) &&
 		    net_eq(sock_net(sk), net) &&
 		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+		    (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) &&
 		    (!sk->sk_bound_dev_if || !dif ||
 		     sk->sk_bound_dev_if == dif))
 			goto found;
@@ -71,15 +73,6 @@ found:
 	return sk;
 }
 
-static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
-{
-	struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
-	if (sk)
-		sock_hold(sk);
-
-	return sk;
-}
-
 /* When processing receive frames, there are two cases to
  * consider. Data frames consist of a non-zero session-id and an
  * optional cookie. Control frames consist of a regular L2TP header
@@ -183,8 +176,8 @@ pass_up:
 		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
 
 		read_lock_bh(&l2tp_ip_lock);
-		sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb),
-					   tunnel_id);
+		sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
+					   inet_iif(skb), tunnel_id);
 		if (!sk) {
 			read_unlock_bh(&l2tp_ip_lock);
 			goto discard;
@@ -280,7 +273,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		inet->inet_saddr = 0;  /* Use device */
 
 	write_lock_bh(&l2tp_ip_lock);
-	if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+	if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
 				  sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
 		write_unlock_bh(&l2tp_ip_lock);
 		ret = -EADDRINUSE;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 3135b9d55df5..331ccf5a7bad 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
 
 static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 					   struct in6_addr *laddr,
+					   const struct in6_addr *raddr,
 					   int dif, u32 tunnel_id)
 {
 	struct sock *sk;
 
 	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
 		const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
+		const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
 		if (l2tp == NULL)
@@ -73,6 +75,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 		if ((l2tp->conn_id == tunnel_id) &&
 		    net_eq(sock_net(sk), net) &&
 		    (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) &&
+		    (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) &&
 		    (!sk->sk_bound_dev_if || !dif ||
 		     sk->sk_bound_dev_if == dif))
 			goto found;
@@ -83,17 +86,6 @@ found:
 	return sk;
 }
 
-static inline struct sock *l2tp_ip6_bind_lookup(struct net *net,
-						struct in6_addr *laddr,
-						int dif, u32 tunnel_id)
-{
-	struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id);
-	if (sk)
-		sock_hold(sk);
-
-	return sk;
-}
-
 /* When processing receive frames, there are two cases to
  * consider. Data frames consist of a non-zero session-id and an
  * optional cookie. Control frames consist of a regular L2TP header
@@ -197,8 +189,8 @@ pass_up:
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		read_lock_bh(&l2tp_ip6_lock);
-		sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb),
-					    tunnel_id);
+		sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+					    inet6_iif(skb), tunnel_id);
 		if (!sk) {
 			read_unlock_bh(&l2tp_ip6_lock);
 			goto discard;
@@ -330,7 +322,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rcu_read_unlock();
 
 	write_lock_bh(&l2tp_ip6_lock);
-	if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if,
+	if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
 				   addr->l2tp_conn_id)) {
 		write_unlock_bh(&l2tp_ip6_lock);
 		err = -EADDRINUSE;
-- 
cgit v1.2.3


From 35f432a03e41d3bf08c51ede917f94e2288fbe8c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 2 Jan 2017 11:19:29 +0100
Subject: mac80211: initialize fast-xmit 'info' later

In ieee80211_xmit_fast(), 'info' is initialized to point to the skb
that's passed in, but that skb may later be replaced by a clone (if
it was shared), leading to an invalid pointer.

This can lead to use-after-free and also later crashes since the
real SKB's info->hw_queue doesn't get initialized properly.

Fix this by assigning info only later, when it's needed, after the
skb replacement (may have) happened.

Cc: stable@vger.kernel.org
Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2c21b7039136..0d8b716e509e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3287,7 +3287,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
 	int hw_headroom = sdata->local->hw.extra_tx_headroom;
 	struct ethhdr eth;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *info;
 	struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
@@ -3351,6 +3351,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
 	memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
 
+	info = IEEE80211_SKB_CB(skb);
 	memset(info, 0, sizeof(*info));
 	info->band = fast_tx->band;
 	info->control.vif = &sdata->vif;
-- 
cgit v1.2.3


From f83f90cf7ba68deb09406ea9da80852a64c4db29 Mon Sep 17 00:00:00 2001
From: Alex Wood <thetewood@gmail.com>
Date: Fri, 23 Dec 2016 12:50:13 +0000
Subject: HID: usbhid: Add quirk for the Futaba TOSD-5711BB VFD

The Futaba TOSD-5711BB VFD crashes when the initial HID report is requested,
register the display in hid-ids and tell hid-quirks to not do the init.

Signed-off-by: Alex Wood <thetewood@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ec277b96eaa1..62b03d53ecd1 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -365,6 +365,9 @@
 #define USB_VENDOR_ID_FLATFROG		0x25b5
 #define USB_DEVICE_ID_MULTITOUCH_3200	0x0002
 
+#define USB_VENDOR_ID_FUTABA            0x0547
+#define USB_DEVICE_ID_LED_DISPLAY       0x7000
+
 #define USB_VENDOR_ID_ESSENTIAL_REALITY	0x0d7f
 #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index b3e01c82af05..6fd49788fbe6 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -88,6 +88,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
-- 
cgit v1.2.3


From 8aa2cc7e747881d1fd52db28261b201d4e3e5565 Mon Sep 17 00:00:00 2001
From: Marcel Hasler <mahasler@gmail.com>
Date: Tue, 20 Dec 2016 22:08:13 +0100
Subject: HID: usbhid: Add quirk for Mayflash/Dragonrise DolphinBar.

The DolphinBar by Mayflash (identified as Dragonrise) needs
HID_QUIRK_MULTI_INPUT to split it up into four input devices. Without this
quirk the adapter is falsely recognized as a tablet. See also bug 115841
(https://bugzilla.kernel.org/show_bug.cgi?id=115841).

Signed-off-by: Marcel Hasler <mahasler@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 1 +
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 62b03d53ecd1..54bd22dc1411 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -319,6 +319,7 @@
 #define USB_VENDOR_ID_DRAGONRISE		0x0079
 #define USB_DEVICE_ID_DRAGONRISE_WIIU		0x1800
 #define USB_DEVICE_ID_DRAGONRISE_PS3		0x1801
+#define USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR	0x1803
 #define USB_DEVICE_ID_DRAGONRISE_GAMECUBE	0x1843
 
 #define USB_VENDOR_ID_DWAV		0x0eef
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 6fd49788fbe6..e9d6cc7cdfc5 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -83,6 +83,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT },
+	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
-- 
cgit v1.2.3


From 143fca77cce906d35f7a60ccef648e888df589f2 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 27 Dec 2016 08:57:59 -0800
Subject: HID: sensor-hub: Move the memset to sensor_hub_get_feature()

While applying patch d443a0aa3a29: "HID: hid-sensor-hub: clear memory to
avoid random data", there was some issues in applying correct version of
the patch. This resulted in the breakage of sensor functions as all
request like power-up will be reset by the memset() in the function
sensor_hub_set_feature().
The reset of caller buffer should be in the function
sensor_hub_get_feature(), not in the sensor_hub_set_feature().

Fixes: d443a0aa3a29 ("HID: hid-sensor-hub: clear memory to avoid random data")
Cc: Stable <stable@vger.kernel.org> # 4.9+
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sensor-hub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 5c925228847c..4ef73374a8f9 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -212,7 +212,6 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 	__s32 value;
 	int ret = 0;
 
-	memset(buffer, 0, buffer_size);
 	mutex_lock(&data->mutex);
 	report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT);
 	if (!report || (field_index >= report->maxfield)) {
@@ -256,6 +255,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 	int buffer_index = 0;
 	int i;
 
+	memset(buffer, 0, buffer_size);
+
 	mutex_lock(&data->mutex);
 	report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT);
 	if (!report || (field_index >= report->maxfield) ||
-- 
cgit v1.2.3


From 6c006a9d94bfb5cbcc5150e8fd7f45d3f92f3ee8 Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Sun, 25 Dec 2016 19:01:03 +0530
Subject: clean_bdev_aliases: Prevent cleaning blocks that are not in block
 range

The first block to be cleaned may start at a non-zero page offset. In
such a scenario clean_bdev_aliases() will end up cleaning blocks that
do not fall in the range of blocks to be cleaned. This commit fixes the
issue by skipping blocks that do not fall in valid block range.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Eryu Guan <eguan@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d21771fcf7d3..0e87401cf335 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1660,7 +1660,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
 			head = page_buffers(page);
 			bh = head;
 			do {
-				if (!buffer_mapped(bh))
+				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
 					goto next;
 				if (bh->b_blocknr >= block + len)
 					break;
-- 
cgit v1.2.3


From f2e0a0b292682dd94274d6793d76656b41526147 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <Bart.VanAssche@sandisk.com>
Date: Mon, 2 Jan 2017 09:46:15 -0700
Subject: block: Make wbt_wait() definition consistent with declaration

Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-wbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 6e82769f4042..fd28c2806406 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -595,7 +595,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.
  */
-unsigned int wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
+enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 {
 	unsigned int ret = 0;
 
-- 
cgit v1.2.3


From 9eca53508a157c6b6fdb6e06796902cf8a920d29 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <Bart.VanAssche@sandisk.com>
Date: Mon, 2 Jan 2017 09:48:47 -0700
Subject: block: Avoid that sparse complains about context imbalance in
 __wbt_wait()

This patch does not change any functionality.

Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-wbt.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index fd28c2806406..f0a9c07b4c7a 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -544,6 +544,8 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
  * the timer to kick off queuing again.
  */
 static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
+	__releases(lock)
+	__acquires(lock)
 {
 	struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
 	DEFINE_WAIT(wait);
@@ -558,13 +560,12 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
 		if (may_queue(rwb, rqw, &wait, rw))
 			break;
 
-		if (lock)
+		if (lock) {
 			spin_unlock_irq(lock);
-
-		io_schedule();
-
-		if (lock)
+			io_schedule();
 			spin_lock_irq(lock);
+		} else
+			io_schedule();
 	} while (1);
 
 	finish_wait(&rqw->wait, &wait);
-- 
cgit v1.2.3


From b4a9eb4cd5966c8aad3d007d206a2cbda97d6928 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 2 Jan 2017 17:43:15 +0100
Subject: parisc: Add line-break when printing segfault info

Add a leading line break else printed line gets too long.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # v4.9
---
 arch/parisc/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 8ff9253930af..1a0b4f63f0e9 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -234,7 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code,
 	    tsk->comm, code, address);
 	print_vma_addr(KERN_CONT " in ", regs->iaoq[0]);
 
-	pr_cont(" trap #%lu: %s%c", code, trap_name(code),
+	pr_cont("\ntrap #%lu: %s%c", code, trap_name(code),
 		vma ? ',':'\n');
 
 	if (vma)
-- 
cgit v1.2.3


From d0af683407a26a4437d8fa6e283ea201f2ae8146 Mon Sep 17 00:00:00 2001
From: Ian Kumlien <ian.kumlien@gmail.com>
Date: Mon, 2 Jan 2017 09:18:35 +0100
Subject: flow_dissector: Update pptp handling to avoid null pointer deref.

__skb_flow_dissect can be called with a skb or a data packet, either
can be NULL. All calls seems to have been moved to __skb_header_pointer
except the pptp handling which is still calling skb_header_pointer.

skb_header_pointer will use skb->data and thus:
[  109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
[  109.557102] IP: [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
[  109.557263] PGD 0
[  109.557338]
[  109.557484] Oops: 0000 [#1] SMP
[  109.557562] Modules linked in: chaoskey
[  109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79
[  109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015
[  109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000
[  109.558041] RIP: 0010:[<ffffffff88dc02f8>]  [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
[  109.558203] RSP: 0018:ffff94087fc83d40  EFLAGS: 00010206
[  109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800
[  109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000
[  109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022
[  109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000
[  109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f
[  109.558979] FS:  0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000
[  109.559326] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0
[  109.559753] Stack:
[  109.559957]  000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0
[  109.560578]  0000000000000001 0000000000002000 0000000000000000 0000000000000000
[  109.561200]  0000000000000000 0000000000000000 0000000000000000 0000000000000000
[  109.561820] Call Trace:
[  109.562027]  <IRQ>
[  109.562108]  [<ffffffff88dfb4fa>] ? eth_get_headlen+0x7a/0xf0
[  109.562522]  [<ffffffff88c5a35a>] ? igb_poll+0x96a/0xe80
[  109.562737]  [<ffffffff88dc912b>] ? net_rx_action+0x20b/0x350
[  109.562953]  [<ffffffff88546d68>] ? __do_softirq+0xe8/0x280
[  109.563169]  [<ffffffff8854704a>] ? irq_exit+0xaa/0xb0
[  109.563382]  [<ffffffff8847229b>] ? do_IRQ+0x4b/0xc0
[  109.563597]  [<ffffffff8902d4ff>] ? common_interrupt+0x7f/0x7f
[  109.563810]  <EOI>
[  109.563890]  [<ffffffff88d57530>] ? cpuidle_enter_state+0x130/0x2c0
[  109.564304]  [<ffffffff88d57520>] ? cpuidle_enter_state+0x120/0x2c0
[  109.564520]  [<ffffffff8857eacf>] ? cpu_startup_entry+0x19f/0x1f0
[  109.564737]  [<ffffffff8848d55a>] ? start_secondary+0x12a/0x140
[  109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00
00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2
04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84
24 84
[  109.569959] RIP  [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
[  109.570245]  RSP <ffff94087fc83d40>
[  109.570453] CR2: 0000000000000080

Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash")
Signed-off-by: Ian Kumlien <ian.kumlien@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d6447dc10371..fe4e1531976c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -468,8 +468,9 @@ ip_proto_again:
 			if (hdr->flags & GRE_ACK)
 				offset += sizeof(((struct pptp_gre_header *)0)->ack);
 
-			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
-						     sizeof(_ppp_hdr), _ppp_hdr);
+			ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
+						     sizeof(_ppp_hdr),
+						     data, hlen, _ppp_hdr);
 			if (!ppp_hdr)
 				goto out_bad;
 
-- 
cgit v1.2.3


From 7ababb782690e03b78657e27bd051e20163af2d6 Mon Sep 17 00:00:00 2001
From: Michal Tesar <mtesar@redhat.com>
Date: Mon, 2 Jan 2017 14:38:36 +0100
Subject: igmp: Make igmp group member RFC 3376 compliant

5.2. Action on Reception of a Query

 When a system receives a Query, it does not respond immediately.
 Instead, it delays its response by a random amount of time, bounded
 by the Max Resp Time value derived from the Max Resp Code in the
 received Query message.  A system may receive a variety of Queries on
 different interfaces and of different kinds (e.g., General Queries,
 Group-Specific Queries, and Group-and-Source-Specific Queries), each
 of which may require its own delayed response.

 Before scheduling a response to a Query, the system must first
 consider previously scheduled pending responses and in many cases
 schedule a combined response.  Therefore, the system must be able to
 maintain the following state:

 o A timer per interface for scheduling responses to General Queries.

 o A per-group and interface timer for scheduling responses to Group-
   Specific and Group-and-Source-Specific Queries.

 o A per-group and interface list of sources to be reported in the
   response to a Group-and-Source-Specific Query.

 When a new Query with the Router-Alert option arrives on an
 interface, provided the system has state to report, a delay for a
 response is randomly selected in the range (0, [Max Resp Time]) where
 Max Resp Time is derived from Max Resp Code in the received Query
 message.  The following rules are then used to determine if a Report
 needs to be scheduled and the type of Report to schedule.  The rules
 are considered in order and only the first matching rule is applied.

 1. If there is a pending response to a previous General Query
    scheduled sooner than the selected delay, no additional response
    needs to be scheduled.

 2. If the received Query is a General Query, the interface timer is
    used to schedule a response to the General Query after the
    selected delay.  Any previously pending response to a General
    Query is canceled.
--8<--

Currently the timer is rearmed with new random expiration time for
every incoming query regardless of possibly already pending report.
Which is not aligned with the above RFE.
It also might happen that higher rate of incoming queries can
postpone the report after the expiration time of the first query
causing group membership loss.

Now the per interface general query timer is rearmed only
when there is no pending report already scheduled on that interface or
the newly selected expiration time is before the already pending
scheduled report.

Signed-off-by: Michal Tesar <mtesar@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 68d622133f53..5b15459955f8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
 static void igmp_gq_start_timer(struct in_device *in_dev)
 {
 	int tv = prandom_u32() % in_dev->mr_maxdelay;
+	unsigned long exp = jiffies + tv + 2;
+
+	if (in_dev->mr_gq_running &&
+	    time_after_eq(exp, (in_dev->mr_gq_timer).expires))
+		return;
 
 	in_dev->mr_gq_running = 1;
-	if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2))
+	if (!mod_timer(&in_dev->mr_gq_timer, exp))
 		in_dev_hold(in_dev);
 }
 
-- 
cgit v1.2.3


From 4e5da369df64628358e25ffedcf80ac43af3793d Mon Sep 17 00:00:00 2001
From: Alexander Alemayhu <alexander@alemayhu.com>
Date: Mon, 2 Jan 2017 18:52:24 +0100
Subject: Documentation/networking: fix typo in mpls-sysctl

s/utliziation/utilization

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/mpls-sysctl.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 9ed15f86c17c..15d8d16934fd 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -5,8 +5,8 @@ platform_labels - INTEGER
 	possible to configure forwarding for label values equal to or
 	greater than the number of platform labels.
 
-	A dense utliziation of the entries in the platform label table
-	is possible and expected aas the platform labels are locally
+	A dense utilization of the entries in the platform label table
+	is possible and expected as the platform labels are locally
 	allocated.
 
 	If the number of platform label table entries is set to 0 no
-- 
cgit v1.2.3


From 8f87e626b059f1b82b017f53c5ee91fbc4486e36 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 2 Jan 2017 12:56:02 +0100
Subject: net: stmmac: dwmac-oxnas: fix of-node leak

Use the syscon lookup-by-phandle helper so that the reference taken by
of_parse_phandle() is released when done with the node.

Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index c35597586121..fcc237e0aae1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -109,16 +109,9 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 {
 	struct plat_stmmacenet_data *plat_dat;
 	struct stmmac_resources stmmac_res;
-	struct device_node *sysctrl;
 	struct oxnas_dwmac *dwmac;
 	int ret;
 
-	sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0);
-	if (!sysctrl) {
-		dev_err(&pdev->dev, "failed to get sys-ctrl node\n");
-		return -EINVAL;
-	}
-
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (ret)
 		return ret;
@@ -134,7 +127,8 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 	dwmac->dev = &pdev->dev;
 	plat_dat->bsp_priv = dwmac;
 
-	dwmac->regmap = syscon_node_to_regmap(sysctrl);
+	dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							"oxsemi,sys-ctrl");
 	if (IS_ERR(dwmac->regmap)) {
 		dev_err(&pdev->dev, "failed to have sysctrl regmap\n");
 		return PTR_ERR(dwmac->regmap);
-- 
cgit v1.2.3


From 6b4c212b95ce6a586473a772fb2d28ab22a38f0e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 2 Jan 2017 12:56:03 +0100
Subject: net: stmmac: dwmac-oxnas: fix fixed-link-phydev leaks

Make sure to deregister and free any fixed-link phy registered during
probe on probe errors and on driver unbind by calling the new glue
helper function.

For driver unbind, use the generic stmmac-platform remove implementation
and add an exit callback to disable the clock.

Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 41 ++++++++++++++---------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index fcc237e0aae1..3efd110613df 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -105,6 +105,13 @@ static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac)
 	return 0;
 }
 
+static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct oxnas_dwmac *dwmac = priv;
+
+	clk_disable_unprepare(dwmac->clk);
+}
+
 static int oxnas_dwmac_probe(struct platform_device *pdev)
 {
 	struct plat_stmmacenet_data *plat_dat;
@@ -121,40 +128,44 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	dwmac->dev = &pdev->dev;
 	plat_dat->bsp_priv = dwmac;
+	plat_dat->exit = oxnas_dwmac_exit;
 
 	dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 							"oxsemi,sys-ctrl");
 	if (IS_ERR(dwmac->regmap)) {
 		dev_err(&pdev->dev, "failed to have sysctrl regmap\n");
-		return PTR_ERR(dwmac->regmap);
+		ret = PTR_ERR(dwmac->regmap);
+		goto err_remove_config_dt;
 	}
 
 	dwmac->clk = devm_clk_get(&pdev->dev, "gmac");
-	if (IS_ERR(dwmac->clk))
-		return PTR_ERR(dwmac->clk);
+	if (IS_ERR(dwmac->clk)) {
+		ret = PTR_ERR(dwmac->clk);
+		goto err_remove_config_dt;
+	}
 
 	ret = oxnas_dwmac_init(dwmac);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		clk_disable_unprepare(dwmac->clk);
+		goto err_dwmac_exit;
 
-	return ret;
-}
 
-static int oxnas_dwmac_remove(struct platform_device *pdev)
-{
-	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-	int ret = stmmac_dvr_remove(&pdev->dev);
+	return 0;
 
-	clk_disable_unprepare(dwmac->clk);
+err_dwmac_exit:
+	oxnas_dwmac_exit(pdev, plat_dat->bsp_priv);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
@@ -197,7 +208,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
 
 static struct platform_driver oxnas_dwmac_driver = {
 	.probe  = oxnas_dwmac_probe,
-	.remove = oxnas_dwmac_remove,
+	.remove = stmmac_pltfr_remove,
 	.driver = {
 		.name           = "oxnas-dwmac",
 		.pm		= &oxnas_dwmac_pm_ops,
-- 
cgit v1.2.3


From a8de4d719dfc12bc22192d7daef7c7ae6cfb8b80 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 2 Jan 2017 12:56:04 +0100
Subject: net: stmmac: dwmac-oxnas: use generic pm implementation

Now that we have an exit callback in place, add init as well and get rid
of the custom PM callbacks in favour of the generic ones.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 38 +++--------------------
 1 file changed, 5 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index 3efd110613df..3dc7d279f805 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -60,8 +60,9 @@ struct oxnas_dwmac {
 	struct regmap	*regmap;
 };
 
-static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac)
+static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
 {
+	struct oxnas_dwmac *dwmac = priv;
 	unsigned int value;
 	int ret;
 
@@ -135,6 +136,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 
 	dwmac->dev = &pdev->dev;
 	plat_dat->bsp_priv = dwmac;
+	plat_dat->init = oxnas_dwmac_init;
 	plat_dat->exit = oxnas_dwmac_exit;
 
 	dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
@@ -151,7 +153,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	ret = oxnas_dwmac_init(dwmac);
+	ret = oxnas_dwmac_init(pdev, plat_dat->bsp_priv);
 	if (ret)
 		goto err_remove_config_dt;
 
@@ -170,36 +172,6 @@ err_remove_config_dt:
 	return ret;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int oxnas_dwmac_suspend(struct device *dev)
-{
-	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
-	int ret;
-
-	ret = stmmac_suspend(dev);
-	clk_disable_unprepare(dwmac->clk);
-
-	return ret;
-}
-
-static int oxnas_dwmac_resume(struct device *dev)
-{
-	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
-	int ret;
-
-	ret = oxnas_dwmac_init(dwmac);
-	if (ret)
-		return ret;
-
-	ret = stmmac_resume(dev);
-
-	return ret;
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops,
-	oxnas_dwmac_suspend, oxnas_dwmac_resume);
-
 static const struct of_device_id oxnas_dwmac_match[] = {
 	{ .compatible = "oxsemi,ox820-dwmac" },
 	{ }
@@ -211,7 +183,7 @@ static struct platform_driver oxnas_dwmac_driver = {
 	.remove = stmmac_pltfr_remove,
 	.driver = {
 		.name           = "oxnas-dwmac",
-		.pm		= &oxnas_dwmac_pm_ops,
+		.pm		= &stmmac_pltfr_pm_ops,
 		.of_match_table = oxnas_dwmac_match,
 	},
 };
-- 
cgit v1.2.3


From 515028fe29d84a15f77d071a13b2d34eb3d137af Mon Sep 17 00:00:00 2001
From: Bartosz Folta <bfolta@cadence.com>
Date: Mon, 2 Jan 2017 12:41:50 +0000
Subject: net: macb: Updated resource allocation function calls to new version
 of API.

Changed function calls of resource allocation to new API. Changed way
of setting DMA mask. Removed unnecessary sanity check.
This patch is sent in regard to recently applied patch
Commit 83a77e9ec4150ee4acc635638f7dedd9da523a26
net: macb: Added PCI wrapper for Platform Driver.

Signed-off-by: Bartosz Folta <bfolta@cadence.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_pci.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 92be2cd8f817..9906fda76087 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -1,5 +1,5 @@
 /**
- * macb_pci.c - Cadence GEM PCI wrapper.
+ * Cadence GEM PCI wrapper.
  *
  * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com
  *
@@ -45,32 +45,27 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct macb_platform_data plat_data;
 	struct resource res[2];
 
-	/* sanity check */
-	if (!id)
-		return -EINVAL;
-
 	/* enable pci device */
-	err = pci_enable_device(pdev);
+	err = pcim_enable_device(pdev);
 	if (err < 0) {
-		dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X",
-			err);
-		return -EACCES;
+		dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err);
+		return err;
 	}
 
 	pci_set_master(pdev);
 
 	/* set up resources */
 	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
-	res[0].start = pdev->resource[0].start;
-	res[0].end = pdev->resource[0].end;
+	res[0].start = pci_resource_start(pdev, 0);
+	res[0].end = pci_resource_end(pdev, 0);
 	res[0].name = PCI_DRIVER_NAME;
 	res[0].flags = IORESOURCE_MEM;
-	res[1].start = pdev->irq;
+	res[1].start = pci_irq_vector(pdev, 0);
 	res[1].name = PCI_DRIVER_NAME;
 	res[1].flags = IORESOURCE_IRQ;
 
-	dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n",
-		 (void *)(uintptr_t)pci_resource_start(pdev, 0));
+	dev_info(&pdev->dev, "EMAC physical base addr: %pa\n",
+		 &res[0].start);
 
 	/* set up macb platform data */
 	memset(&plat_data, 0, sizeof(plat_data));
@@ -100,7 +95,7 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	plat_info.num_res = ARRAY_SIZE(res);
 	plat_info.data = &plat_data;
 	plat_info.size_data = sizeof(plat_data);
-	plat_info.dma_mask = DMA_BIT_MASK(32);
+	plat_info.dma_mask = pdev->dma_mask;
 
 	/* register platform device */
 	plat_dev = platform_device_register_full(&plat_info);
@@ -120,7 +115,6 @@ err_hclk_register:
 	clk_unregister(plat_data.pclk);
 
 err_pclk_register:
-	pci_disable_device(pdev);
 	return err;
 }
 
@@ -130,7 +124,6 @@ static void macb_remove(struct pci_dev *pdev)
 	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
 
 	platform_device_unregister(plat_dev);
-	pci_disable_device(pdev);
 	clk_unregister(plat_data->pclk);
 	clk_unregister(plat_data->hclk);
 }
-- 
cgit v1.2.3


From 5350d54f6cd12eaff623e890744c79b700bd3f17 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 2 Jan 2017 13:32:54 -0800
Subject: ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules

In the case of custom rules being present we need to handle the case of the
LOCAL table being intialized after the new rule has been added.  To address
that I am adding a new check so that we can make certain we don't use an
alias of MAIN for LOCAL when allocating a new table.

Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
Reported-by: Oliver Brunel <jjk@jjacky.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3ff8938893ec..eae0332b0e8c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	if (tb)
 		return tb;
 
-	if (id == RT_TABLE_LOCAL)
+	if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
 		alias = fib_new_table(net, RT_TABLE_MAIN);
 
 	tb = fib_trie_table(id, alias);
-- 
cgit v1.2.3


From 096de2f83ebc8e0404c5b7e847a4abd27b9739da Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 3 Jan 2017 16:26:04 +0100
Subject: benet: stricter vxlan offloading check in be_features_check

When VXLAN offloading is enabled, be_features_check() tries to check if
an encapsulated packet is indeed a VXLAN packet. The check is not strict
enough, and considers any UDP-encapsulated ethernet frame with a 8-byte
tunnel header as being VXLAN. Unfortunately, both GENEVE and VXLAN-GPE
have a 8-byte header, so they get through this check.

Force the UDP destination port to be the one that has been offloaded to
hardware.

Without this, GENEVE-encapsulated packets can end up having an incorrect
checksum when both a GENEVE and a VXLAN (offloaded) tunnel are
configured.

This is similar to commit a547224dceed ("mlx4e: Do not attempt to
offload VXLAN ports that are unrecognized").

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 7e1633bf5a22..225e9a4877d7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5155,7 +5155,9 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 	    skb->inner_protocol != htons(ETH_P_TEB) ||
 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
-	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
+	    !adapter->vxlan_port ||
+	    udp_hdr(skb)->dest != adapter->vxlan_port)
 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
 	return features;
-- 
cgit v1.2.3


From 3b48ab2248e61408910e792fe84d6ec466084c1a Mon Sep 17 00:00:00 2001
From: Reiter Wolfgang <wr0112358@gmail.com>
Date: Tue, 3 Jan 2017 01:39:10 +0100
Subject: drop_monitor: consider inserted data in genlmsg_end

Final nlmsg_len field update must reflect inserted net_dm_drop_point
data.

This patch depends on previous patch:
"drop_monitor: add missing call to genlmsg_end"

Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index f465bad2ef2c..fb55327dcfea 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -102,7 +102,6 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 	}
 	msg = nla_data(nla);
 	memset(msg, 0, al);
-	genlmsg_end(skb, msg_header);
 	goto out;
 
 err:
@@ -112,6 +111,13 @@ out:
 	swap(data->skb, skb);
 	spin_unlock_irqrestore(&data->lock, flags);
 
+	if (skb) {
+		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+		struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
+
+		genlmsg_end(skb, genlmsg_data(gnlh));
+	}
+
 	return skb;
 }
 
-- 
cgit v1.2.3


From 9988f4d577f42f43b7612d755477585f35424af7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 16 Dec 2016 12:59:31 -0800
Subject: latent_entropy: fix ARM build error on earlier gcc

This fixes build errors seen on gcc-4.9.3 or gcc-5.3.1 for an ARM:

arm-soc/init/initramfs.c: In function 'error':
arm-soc/init/initramfs.c:50:1: error: unrecognizable insn:
 }
 ^
(insn 26 25 27 5 (set (reg:SI 111 [ local_entropy.243 ])
        (rotatert:SI (reg:SI 116 [ local_entropy.243 ])
            (const_int -30 [0xffffffffffffffe2]))) -1
     (nil))

Patch from PaX Team <pageexec@freemail.hu>

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Brad Spengler <spender@grsecurity.net>
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/latent_entropy_plugin.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c
index 12541126575b..8ff203ad4809 100644
--- a/scripts/gcc-plugins/latent_entropy_plugin.c
+++ b/scripts/gcc-plugins/latent_entropy_plugin.c
@@ -328,9 +328,9 @@ static enum tree_code get_op(tree *rhs)
 			op = LROTATE_EXPR;
 			/*
 			 * This code limits the value of random_const to
-			 * the size of a wide int for the rotation
+			 * the size of a long for the rotation
 			 */
-			random_const &= HOST_BITS_PER_WIDE_INT - 1;
+			random_const %= TYPE_PRECISION(long_unsigned_type_node);
 			break;
 		}
 
-- 
cgit v1.2.3


From 81d873a87114b05dbb74d1fbf0c4322ba4bfdee4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 16 Dec 2016 11:36:06 -0800
Subject: gcc-plugins: update gcc-common.h for gcc-7

This updates gcc-common.h from Emese Revfy for gcc 7. This fixes issues seen
by Kugan and Arnd. Build tested with gcc 5.4 and 7 snapshot.

Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/gcc-common.h | 85 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 950fd2e64bb7..12262c0cc691 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -39,6 +39,9 @@
 #include "hash-map.h"
 #endif
 
+#if BUILDING_GCC_VERSION >= 7000
+#include "memmodel.h"
+#endif
 #include "emit-rtl.h"
 #include "debug.h"
 #include "target.h"
@@ -91,6 +94,9 @@
 #include "tree-ssa-alias.h"
 #include "tree-ssa.h"
 #include "stringpool.h"
+#if BUILDING_GCC_VERSION >= 7000
+#include "tree-vrp.h"
+#endif
 #include "tree-ssanames.h"
 #include "print-tree.h"
 #include "tree-eh.h"
@@ -287,6 +293,22 @@ static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct c
 	return NULL;
 }
 
+static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable)
+{
+	cgraph_node_ptr alias;
+
+	if (callback(node, data))
+		return true;
+
+	for (alias = node->same_body; alias; alias = alias->next) {
+		if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE)
+			if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable))
+				return true;
+	}
+
+	return false;
+}
+
 #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \
 	for ((node) = cgraph_first_function_with_gimple_body(); (node); \
 		(node) = cgraph_next_function_with_gimple_body(node))
@@ -399,6 +421,7 @@ typedef union gimple_statement_d gassign;
 typedef union gimple_statement_d gcall;
 typedef union gimple_statement_d gcond;
 typedef union gimple_statement_d gdebug;
+typedef union gimple_statement_d ggoto;
 typedef union gimple_statement_d gphi;
 typedef union gimple_statement_d greturn;
 
@@ -452,6 +475,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt)
 	return stmt;
 }
 
+static inline ggoto *as_a_ggoto(gimple stmt)
+{
+	return stmt;
+}
+
+static inline const ggoto *as_a_const_ggoto(const_gimple stmt)
+{
+	return stmt;
+}
+
 static inline gphi *as_a_gphi(gimple stmt)
 {
 	return stmt;
@@ -496,6 +529,14 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt)
 
 typedef struct rtx_def rtx_insn;
 
+static inline const char *get_decl_section_name(const_tree decl)
+{
+	if (DECL_SECTION_NAME(decl) == NULL_TREE)
+		return NULL;
+
+	return TREE_STRING_POINTER(DECL_SECTION_NAME(decl));
+}
+
 static inline void set_decl_section_name(tree node, const char *value)
 {
 	if (value)
@@ -511,6 +552,7 @@ typedef struct gimple_statement_base gassign;
 typedef struct gimple_statement_call gcall;
 typedef struct gimple_statement_base gcond;
 typedef struct gimple_statement_base gdebug;
+typedef struct gimple_statement_base ggoto;
 typedef struct gimple_statement_phi gphi;
 typedef struct gimple_statement_base greturn;
 
@@ -564,6 +606,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt)
 	return stmt;
 }
 
+static inline ggoto *as_a_ggoto(gimple stmt)
+{
+	return stmt;
+}
+
+static inline const ggoto *as_a_const_ggoto(const_gimple stmt)
+{
+	return stmt;
+}
+
 static inline gphi *as_a_gphi(gimple stmt)
 {
 	return as_a<gphi>(stmt);
@@ -611,6 +663,11 @@ inline bool is_a_helper<const gassign *>::test(const_gimple gs)
 
 #define INSN_DELETED_P(insn) (insn)->deleted()
 
+static inline const char *get_decl_section_name(const_tree decl)
+{
+	return DECL_SECTION_NAME(decl);
+}
+
 /* symtab/cgraph related */
 #define debug_cgraph_node(node) (node)->debug()
 #define cgraph_get_node(decl) cgraph_node::get(decl)
@@ -619,6 +676,7 @@ inline bool is_a_helper<const gassign *>::test(const_gimple gs)
 #define cgraph_n_nodes symtab->cgraph_count
 #define cgraph_max_uid symtab->cgraph_max_uid
 #define varpool_get_node(decl) varpool_node::get(decl)
+#define dump_varpool_node(file, node) (node)->dump(file)
 
 #define cgraph_create_edge(caller, callee, call_stmt, count, freq, nest) \
 	(caller)->create_edge((callee), (call_stmt), (count), (freq))
@@ -674,6 +732,11 @@ static inline cgraph_node_ptr cgraph_alias_target(cgraph_node_ptr node)
 	return node->get_alias_target();
 }
 
+static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable)
+{
+	return node->call_for_symbol_thunks_and_aliases(callback, data, include_overwritable);
+}
+
 static inline struct cgraph_node_hook_list *cgraph_add_function_insertion_hook(cgraph_node_hook hook, void *data)
 {
 	return symtab->add_cgraph_insertion_hook(hook, data);
@@ -729,6 +792,13 @@ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree l
 	return gimple_build_assign(lhs, subcode, op1, op2 PASS_MEM_STAT);
 }
 
+template <>
+template <>
+inline bool is_a_helper<const ggoto *>::test(const_gimple gs)
+{
+	return gs->code == GIMPLE_GOTO;
+}
+
 template <>
 template <>
 inline bool is_a_helper<const greturn *>::test(const_gimple gs)
@@ -766,6 +836,16 @@ static inline const gcall *as_a_const_gcall(const_gimple stmt)
 	return as_a<const gcall *>(stmt);
 }
 
+static inline ggoto *as_a_ggoto(gimple stmt)
+{
+	return as_a<ggoto *>(stmt);
+}
+
+static inline const ggoto *as_a_const_ggoto(const_gimple stmt)
+{
+	return as_a<const ggoto *>(stmt);
+}
+
 static inline gphi *as_a_gphi(gimple stmt)
 {
 	return as_a<gphi *>(stmt);
@@ -828,4 +908,9 @@ static inline void debug_gimple_stmt(const_gimple s)
 #define debug_gimple_stmt(s) debug_gimple_stmt(CONST_CAST_GIMPLE(s))
 #endif
 
+#if BUILDING_GCC_VERSION >= 7000
+#define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning)	\
+	get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep)
+#endif
+
 #endif
-- 
cgit v1.2.3


From 7158339d4c1ede786c48fa5c062fa68df366ba94 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Tue, 3 Jan 2017 17:51:33 -0500
Subject: block: fix up io_poll documentation

/sys/block/<dev>/queue/io_poll is a boolean.  Fix the docs.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/queue-sysfs.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 51642159aedb..c0a3bb5a6e4e 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -54,9 +54,9 @@ This is the hardware sector size of the device, in bytes.
 
 io_poll (RW)
 ------------
-When read, this file shows the total number of block IO polls and how
-many returned success.  Writing '0' to this file will disable polling
-for this device.  Writing any non-zero value will enable this feature.
+When read, this file shows whether polling is enabled (1) or disabled
+(0).  Writing '0' to this file will disable polling for this device.
+Writing any non-zero value will enable this feature.
 
 io_poll_delay (RW)
 ------------------
-- 
cgit v1.2.3


From 926d93a33e59b2729afdbad357233c17184de9d2 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 3 Jan 2017 09:37:55 -0800
Subject: net: vrf: Add missing Rx counters

The move from rx-handler to L3 receive handler inadvertantly dropped the
rx counters. Restore them.

Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6")
Reported-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 7532646c3b7b..23dfb0eac098 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -967,6 +967,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 	 */
 	need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
 	if (!ipv6_ndisc_frame(skb) && !need_strict) {
+		vrf_rx_stats(vrf_dev, skb->len);
 		skb->dev = vrf_dev;
 		skb->skb_iif = vrf_dev->ifindex;
 
@@ -1011,6 +1012,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
 		goto out;
 	}
 
+	vrf_rx_stats(vrf_dev, skb->len);
+
 	skb_push(skb, skb->mac_len);
 	dev_queue_xmit_nit(skb, vrf_dev);
 	skb_pull(skb, skb->mac_len);
-- 
cgit v1.2.3


From 7a21272b088894070391a94fdd1c67014020fa1d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 3 Jan 2017 18:39:32 -0800
Subject: xfs: fix double-cleanup when CUI recovery fails

Dan Carpenter reported a double-free of rcur if _defer_finish fails
while we're recovering CUI items.  Fix the error recovery to prevent
this.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_refcount_item.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fe86a668a57e..6e4c7446c3d4 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -526,13 +526,14 @@ xfs_cui_recover(
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)
-		goto abort_error;
+		goto abort_defer;
 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
 abort_error:
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
+abort_defer:
 	xfs_defer_cancel(&dfops);
 	xfs_trans_cancel(tp);
 	return error;
-- 
cgit v1.2.3


From 20e73b000bcded44a91b79429d8fa743247602ad Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 3 Jan 2017 18:39:33 -0800
Subject: xfs: use the actual AG length when reserving blocks

We need to use the actual AG length when making per-AG reservations,
since we could otherwise end up reserving more blocks out of the last
AG than there are actual blocks.

Complained-about-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_ag_resv.c        |  3 +++
 fs/xfs/libxfs/xfs_refcount_btree.c |  9 ++++++---
 fs/xfs/libxfs/xfs_refcount_btree.h |  3 ++-
 fs/xfs/libxfs/xfs_rmap_btree.c     | 14 +++++++-------
 fs/xfs/libxfs/xfs_rmap_btree.h     |  3 ++-
 fs/xfs/xfs_fsops.c                 | 14 ++++++++++++++
 6 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e5ebc3770460..d346d42c54d1 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -256,6 +256,9 @@ xfs_ag_resv_init(
 			goto out;
 	}
 
+	ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+	       xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+	       pag->pagf_freeblks + pag->pagf_flcount);
 out:
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 6fb2215f8ff7..50add5272807 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -409,13 +409,14 @@ xfs_refcountbt_calc_size(
  */
 xfs_extlen_t
 xfs_refcountbt_max_size(
-	struct xfs_mount	*mp)
+	struct xfs_mount	*mp,
+	xfs_agblock_t		agblocks)
 {
 	/* Bail out if we're uninitialized, which can happen in mkfs. */
 	if (mp->m_refc_mxr[0] == 0)
 		return 0;
 
-	return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks);
+	return xfs_refcountbt_calc_size(mp, agblocks);
 }
 
 /*
@@ -430,22 +431,24 @@ xfs_refcountbt_calc_reserves(
 {
 	struct xfs_buf		*agbp;
 	struct xfs_agf		*agf;
+	xfs_agblock_t		agblocks;
 	xfs_extlen_t		tree_len;
 	int			error;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return 0;
 
-	*ask += xfs_refcountbt_max_size(mp);
 
 	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
 	if (error)
 		return error;
 
 	agf = XFS_BUF_TO_AGF(agbp);
+	agblocks = be32_to_cpu(agf->agf_length);
 	tree_len = be32_to_cpu(agf->agf_refcount_blocks);
 	xfs_buf_relse(agbp);
 
+	*ask += xfs_refcountbt_max_size(mp, agblocks);
 	*used += tree_len;
 
 	return error;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 3be7768bd51a..9db008b955b7 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
 
 extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
 		unsigned long long len);
-extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp);
+extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp,
+		xfs_agblock_t agblocks);
 
 extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
 		xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index de25771764ba..74e5a54bc428 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -550,13 +550,14 @@ xfs_rmapbt_calc_size(
  */
 xfs_extlen_t
 xfs_rmapbt_max_size(
-	struct xfs_mount	*mp)
+	struct xfs_mount	*mp,
+	xfs_agblock_t		agblocks)
 {
 	/* Bail out if we're uninitialized, which can happen in mkfs. */
 	if (mp->m_rmap_mxr[0] == 0)
 		return 0;
 
-	return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks);
+	return xfs_rmapbt_calc_size(mp, agblocks);
 }
 
 /*
@@ -571,25 +572,24 @@ xfs_rmapbt_calc_reserves(
 {
 	struct xfs_buf		*agbp;
 	struct xfs_agf		*agf;
-	xfs_extlen_t		pool_len;
+	xfs_agblock_t		agblocks;
 	xfs_extlen_t		tree_len;
 	int			error;
 
 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
 		return 0;
 
-	/* Reserve 1% of the AG or enough for 1 block per record. */
-	pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp));
-	*ask += pool_len;
-
 	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
 	if (error)
 		return error;
 
 	agf = XFS_BUF_TO_AGF(agbp);
+	agblocks = be32_to_cpu(agf->agf_length);
 	tree_len = be32_to_cpu(agf->agf_rmap_blocks);
 	xfs_buf_relse(agbp);
 
+	/* Reserve 1% of the AG or enough for 1 block per record. */
+	*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
 	*used += tree_len;
 
 	return error;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 2a9ac472fb15..19c08e933049 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
 
 extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
 		unsigned long long len);
-extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp);
+extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
+		xfs_agblock_t agblocks);
 
 extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
 		xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 93d12fa2670d..242e8091296d 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -631,6 +631,20 @@ xfs_growfs_data_private(
 	xfs_set_low_space_thresholds(mp);
 	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
 
+	/*
+	 * If we expanded the last AG, free the per-AG reservation
+	 * so we can reinitialize it with the new size.
+	 */
+	if (new) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, agno);
+		error = xfs_ag_resv_free(pag);
+		xfs_perag_put(pag);
+		if (error)
+			goto out;
+	}
+
 	/* Reserve AG metadata blocks. */
 	error = xfs_fs_reserve_ag_blocks(mp);
 	if (error && error != -ENOSPC)
-- 
cgit v1.2.3


From a1b7a4dea6166cf46be895bce4aac67ea5160fe8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jan 2017 18:39:33 -0800
Subject: xfs: fix crash and data corruption due to removal of busy COW extents

There is a race window between write_cache_pages calling
clear_page_dirty_for_io and XFS calling set_page_writeback, in which
the mapping for an inode is tagged neither as dirty, nor as writeback.

If the COW shrinker hits in exactly that window we'll remove the delayed
COW extents and writepages trying to write it back, which in release
kernels will manifest as corruption of the bmap btree, and in debug
kernels will trip the ASSERT about now calling xfs_bmapi_write with the
COWFORK flag for holes.  A complex customer load manages to hit this
window fairly reliably, probably by always having COW writeback in flight
while the cow shrinker runs.

This patch adds another check for having the I_DIRTY_PAGES flag set,
which is still set during this race window.  While this fixes the problem
I'm still not overly happy about the way the COW shrinker works as it
still seems a bit fragile.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_icache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index ff4d6311c7f4..70ca4f608321 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1597,7 +1597,8 @@ xfs_inode_free_cowblocks(
 	 * If the mapping is dirty or under writeback we cannot touch the
 	 * CoW fork.  Leave it alone if we're in the midst of a directio.
 	 */
-	if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+	if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
 	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
 	    atomic_read(&VFS_I(ip)->i_dio_count))
 		return 0;
-- 
cgit v1.2.3


From 721a0edfbe1f302b93274ce75e0d62843ca63e0d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 3 Jan 2017 18:39:34 -0800
Subject: xfs: update MAINTAINERS

I am taking over as XFS maintainer from Dave Chinner[1], so update
contact information and git tree pointers.

[1] http://lkml.iu.edu/hypermail/linux/kernel/1612.1/04390.html

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..793a016d9e4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13527,11 +13527,11 @@ F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
 
 XFS FILESYSTEM
-M:	Dave Chinner <david@fromorbit.com>
+M:	Darrick J. Wong <darrick.wong@oracle.com>
 M:	linux-xfs@vger.kernel.org
 L:	linux-xfs@vger.kernel.org
 W:	http://xfs.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git
+T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 S:	Supported
 F:	Documentation/filesystems/xfs.txt
 F:	fs/xfs/
-- 
cgit v1.2.3


From ff97f2399edac1e0fb3fa7851d5fbcbdf04717cf Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Tue, 3 Jan 2017 20:34:17 -0800
Subject: xfs: fix max_retries _show and _store functions

max_retries _show and _store functions should test against cfg->max_retries,
not cfg->retry_timeout

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 276d3023d60f..de6195e38910 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -396,7 +396,7 @@ max_retries_show(
 	int		retries;
 	struct xfs_error_cfg *cfg = to_error_cfg(kobject);
 
-	if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER)
+	if (cfg->max_retries == XFS_ERR_RETRY_FOREVER)
 		retries = -1;
 	else
 		retries = cfg->max_retries;
@@ -422,7 +422,7 @@ max_retries_store(
 		return -EINVAL;
 
 	if (val == -1)
-		cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
+		cfg->max_retries = XFS_ERR_RETRY_FOREVER;
 	else
 		cfg->max_retries = val;
 	return count;
-- 
cgit v1.2.3


From 04f6152d9fbad5bb78bccd05e798fa2d66c571e9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Wed, 4 Jan 2017 12:58:44 +0100
Subject: MAINTAINERS: add myself as maintainer of fbdev

I would like to help with fbdev maintenance.  I can dedicate some time
for reviewing and handling patches but won't have time for much more.

The subsystem will remain in maintenance mode (no new drivers will be
added to it).

Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 MAINTAINERS | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..2775b14f861a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5080,9 +5080,11 @@ F:	drivers/net/wan/dlci.c
 F:	drivers/net/wan/sdla.c
 
 FRAMEBUFFER LAYER
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-fbdev@vger.kernel.org
+T:	git git://github.com/bzolnier/linux.git
 Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
-S:	Orphan
+S:	Maintained
 F:	Documentation/fb/
 F:	drivers/video/
 F:	include/video/
-- 
cgit v1.2.3


From 4dcd19bfabaee8f9f4bcf203afba09b98ccbaf76 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 13 Dec 2016 13:50:52 +0530
Subject: video: fbdev: cobalt_lcdfb: Handle return NULL error from
 devm_ioremap

Here, If devm_ioremap will fail. It will return NULL.
Kernel can run into a NULL-pointer dereference.
This error check will avoid NULL pointer dereference.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Yoichi Yuasa <yuasa@linux-mips.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/cobalt_lcdfb.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c
index 2d3b691f3fc4..038ac6934fe9 100644
--- a/drivers/video/fbdev/cobalt_lcdfb.c
+++ b/drivers/video/fbdev/cobalt_lcdfb.c
@@ -308,6 +308,11 @@ static int cobalt_lcdfb_probe(struct platform_device *dev)
 	info->screen_size = resource_size(res);
 	info->screen_base = devm_ioremap(&dev->dev, res->start,
 					 info->screen_size);
+	if (!info->screen_base) {
+		framebuffer_release(info);
+		return -ENOMEM;
+	}
+
 	info->fbops = &cobalt_lcd_fbops;
 	info->fix = cobalt_lcdfb_fix;
 	info->fix.smem_start = res->start;
-- 
cgit v1.2.3


From 63dfb0dac9055145db85ce764355aef2f563739a Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Tue, 3 Jan 2017 17:22:20 +0800
Subject: net: usb: asix_devices: add .reset_resume for USB PM

The USB core may call reset_resume when it fails to resume asix device.
And USB core can recovery this abnormal resume at low level driver,
the same .resume at asix driver can work too. Add .reset_resume can
avoid disconnecting after backing from system resume, and NFS can
still be mounted after this commit.

Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 6c646e228833..6e98ede997d3 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -1367,6 +1367,7 @@ static struct usb_driver asix_driver = {
 	.probe =	usbnet_probe,
 	.suspend =	asix_suspend,
 	.resume =	asix_resume,
+	.reset_resume =	asix_resume,
 	.disconnect =	usbnet_disconnect,
 	.supports_autosuspend = 1,
 	.disable_hub_initiated_lpm = 1,
-- 
cgit v1.2.3


From a9a8cdb368d99bb655b5cdabea560446db0527cc Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Tue, 3 Jan 2017 21:25:48 +0530
Subject: libcxgb: fix error check for ip6_route_output()

ip6_route_output() never returns NULL so
check dst->error instead of !dst.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index 0f0de5b63622..d04a6c163445 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi,
 		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
 			fl6.flowi6_oif = sin6_scope_id;
 		dst = ip6_route_output(&init_net, NULL, &fl6);
-		if (!dst)
-			goto out;
-		if (!cxgb_our_interface(lldi, get_real_dev,
-					ip6_dst_idev(dst)->dev) &&
-		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+		if (dst->error ||
+		    (!cxgb_our_interface(lldi, get_real_dev,
+					 ip6_dst_idev(dst)->dev) &&
+		     !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) {
 			dst_release(dst);
-			dst = NULL;
+			return NULL;
 		}
 	}
 
-out:
 	return dst;
 }
 EXPORT_SYMBOL(cxgb_find_route6);
-- 
cgit v1.2.3


From cd7aeb1f9706b665ad8659df8ff036e7bc0097f4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Jan 2017 13:57:00 -0500
Subject: LiquidIO VF: s/select/imply/ for PTP_1588_CLOCK

Fix a minor fallout from the merge of the timers and the networking
trees. The following error may result if the PTP_1588_CLOCK
prerequisites are not available:

drivers/built-in.o: In function `ptp_clock_unregister':
(.text+0x40e0a5): undefined reference to `pps_unregister_source'
drivers/built-in.o: In function `ptp_clock_unregister':
(.text+0x40e0cc): undefined reference to `posix_clock_unregister'
drivers/built-in.o: In function `ptp_clock_event':
(.text+0x40e249): undefined reference to `pps_event'
drivers/built-in.o: In function `ptp_clock_register':
(.text+0x40e5e1): undefined reference to `pps_register_source'
drivers/built-in.o: In function `ptp_clock_register':
(.text+0x40e62c): undefined reference to `posix_clock_register'
drivers/built-in.o: In function `ptp_clock_register':
(.text+0x40e68d): undefined reference to `pps_unregister_source'

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index bbc8bd16cb97..dcbce6cac63e 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -77,7 +77,7 @@ config OCTEON_MGMT_ETHERNET
 config LIQUIDIO_VF
 	tristate "Cavium LiquidIO VF support"
 	depends on 64BIT && PCI_MSI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Cavium LiquidIO Intelligent Server Adapter
 	  based on CN23XX chips.
-- 
cgit v1.2.3


From bb7da333d0a9f3bddc08f84187b7579a3f68fd24 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 3 Jan 2017 16:34:48 -0800
Subject: net: systemport: Utilize skb_put_padto()

Since we need to pad our packets, utilize skb_put_padto() which
increases skb->len by how much we need to pad, allowing us to eliminate
the test on skb->len right below.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 25d1eb4933d0..e67908b5edfe 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1028,13 +1028,12 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
 	 * (including FCS and tag) because the length verification is done after
 	 * the Broadcom tag is stripped off the ingress packet.
 	 */
-	if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
+	if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
 		ret = NETDEV_TX_OK;
 		goto out;
 	}
 
-	skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ?
-			ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len;
+	skb_len = skb->len;
 
 	mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
 	if (dma_mapping_error(kdev, mapping)) {
-- 
cgit v1.2.3


From 38e5a85562a6cd911fc26d951d576551a688574c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 3 Jan 2017 16:34:49 -0800
Subject: net: systemport: Pad packet before inserting TSB

Inserting the TSB means adding an extra 8 bytes in front the of packet
that is going to be used as metadata information by the TDMA engine, but
stripped off, so it does not really help with the packet padding.

For some odd packet sizes that fall below the 60 bytes payload (e.g: ARP)
we can end-up padding them after the TSB insertion, thus making them 64
bytes, but with the TDMA stripping off the first 8 bytes, they could
still be smaller than 64 bytes which is required to ingress the switch.

Fix this by swapping the padding and TSB insertion, guaranteeing that
the packets have the right sizes.

Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index e67908b5edfe..7e8cf213fd81 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1012,15 +1012,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
 		goto out;
 	}
 
-	/* Insert TSB and checksum infos */
-	if (priv->tsb_en) {
-		skb = bcm_sysport_insert_tsb(skb, dev);
-		if (!skb) {
-			ret = NETDEV_TX_OK;
-			goto out;
-		}
-	}
-
 	/* The Ethernet switch we are interfaced with needs packets to be at
 	 * least 64 bytes (including FCS) otherwise they will be discarded when
 	 * they enter the switch port logic. When Broadcom tags are enabled, we
@@ -1033,6 +1024,15 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
 		goto out;
 	}
 
+	/* Insert TSB and checksum infos */
+	if (priv->tsb_en) {
+		skb = bcm_sysport_insert_tsb(skb, dev);
+		if (!skb) {
+			ret = NETDEV_TX_OK;
+			goto out;
+		}
+	}
+
 	skb_len = skb->len;
 
 	mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
-- 
cgit v1.2.3


From 3fe61f0940d9c7892462c893602fdccfe8b24e8c Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Wed, 4 Jan 2017 13:21:29 +0200
Subject: dpaa_eth: cleanup after init_phy() failure

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 624ba9058dc4..77517aa3e8d9 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2291,7 +2291,8 @@ static int dpaa_open(struct net_device *net_dev)
 	net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
 	if (!net_dev->phydev) {
 		netif_err(priv, ifup, net_dev, "init_phy() failed\n");
-		return -ENODEV;
+		err = -ENODEV;
+		goto phy_init_failed;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
@@ -2314,6 +2315,7 @@ mac_start_failed:
 	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++)
 		fman_port_disable(mac_dev->port[i]);
 
+phy_init_failed:
 	dpaa_eth_napi_disable(priv);
 
 	return err;
-- 
cgit v1.2.3


From 0fbb0f24dde8759925fc56e9dbc6a5b2cbba99c4 Mon Sep 17 00:00:00 2001
From: Roy Pledge <roy.pledge@nxp.com>
Date: Wed, 4 Jan 2017 13:21:30 +0200
Subject: dpaa_eth: Initialize CGR structure before init

The QBMan CGR options needs to be zeroed before calling the init
function

Signed-off-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 77517aa3e8d9..c9b7ad65e563 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -733,6 +733,7 @@ static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
 	priv->cgr_data.cgr.cb = dpaa_eth_cgscn;
 
 	/* Enable Congestion State Change Notifications and CS taildrop */
+	memset(&initcgr, 0, sizeof(initcgr));
 	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES);
 	initcgr.cgr.cscn_en = QM_CGR_EN;
 
@@ -2422,6 +2423,7 @@ static int dpaa_ingress_cgr_init(struct dpaa_priv *priv)
 	}
 
 	/* Enable CS TD, but disable Congestion State Change Notifications. */
+	memset(&initcgr, 0, sizeof(initcgr));
 	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES);
 	initcgr.cgr.cscn_en = QM_CGR_EN;
 	cs_th = DPAA_INGRESS_CS_THRESHOLD;
-- 
cgit v1.2.3


From 4fdda95893de776a8efdf661bbf0e338f2f13dcb Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 4 Jan 2017 15:10:56 +0000
Subject: sfc: don't report RX hash keys to ethtool when RSS wasn't enabled

If we failed to set up RSS on EF10 (e.g. because firmware declared
 RX_RSS_LIMITED), ethtool --show-nfc $dev rx-flow-hash ... should report
 no fields, rather than confusingly reporting what fields we _would_ be
 hashing on if RSS was working.

Fixes: dcb4123cbec0 ("sfc: disable RSS when unsupported")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 3 ++-
 drivers/net/ethernet/sfc/ethtool.c    | 2 ++
 drivers/net/ethernet/sfc/net_driver.h | 2 ++
 drivers/net/ethernet/sfc/siena.c      | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index de2947ccc5ad..5eb0e684fd76 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1323,7 +1323,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 	}
 
 	/* don't fail init if RSS setup doesn't work */
-	efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table);
+	rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table);
+	efx->rss_active = (rc == 0);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 87bdc56b4e3a..18ebaea44e82 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -975,6 +975,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 
 	case ETHTOOL_GRXFH: {
 		info->data = 0;
+		if (!efx->rss_active) /* No RSS */
+			return 0;
 		switch (info->flow_type) {
 		case UDP_V4_FLOW:
 			if (efx->rx_hash_udp_4tuple)
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1a635ced62d0..1c62c1a00fca 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -860,6 +860,7 @@ struct vfdi_status;
  * @rx_hash_key: Toeplitz hash key for RSS
  * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
+ * @rss_active: RSS enabled on hardware
  * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
@@ -998,6 +999,7 @@ struct efx_nic {
 	u8 rx_hash_key[40];
 	u32 rx_indir_table[128];
 	bool rx_scatter;
+	bool rss_active;
 	bool rx_hash_udp_4tuple;
 
 	unsigned int_error_count;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index a3901bc96586..4e54e5dc9fcb 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -403,6 +403,7 @@ static int siena_init_nic(struct efx_nic *efx)
 	efx_writeo(efx, &temp, FR_AZ_RX_CFG);
 
 	siena_rx_push_rss_config(efx, false, efx->rx_indir_table);
+	efx->rss_active = true;
 
 	/* Enable event logging */
 	rc = efx_mcdi_log_ctrl(efx, true, false, 0);
-- 
cgit v1.2.3


From 71eae1ca77fd6be218d8a952d97bba827e56516d Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 4 Jan 2017 23:10:23 +0300
Subject: sh_eth: enable RX descriptor word 0 shift on SH7734

The RX descriptor word 0 on SH7734 has the RFS[9:0] field in bits 16-25
(bits  0-15 usually used for that are occupied by the packet checksum).
Thus  we need to set the 'shift_rd0'  field in the SH7734 SoC data...

Fixes: f0e81fecd4f8 ("net: sh_eth: Add support SH7734")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 0af7fc279c85..00fafabab1d0 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -819,6 +819,7 @@ static struct sh_eth_cpu_data sh7734_data = {
 	.tsu		= 1,
 	.hw_crc		= 1,
 	.select_mii	= 1,
+	.shift_rd0	= 1,
 };
 
 /* SH7763 */
-- 
cgit v1.2.3