From 8759fec4af222f338d08f8f1a7ad6a77ca6cb301 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Wed, 14 Dec 2016 15:15:07 +0100 Subject: crypto: marvell - Copy IVDIG before launching partial DMA ahash requests Currently, inner IV/DIGEST data are only copied once into the hash engines and not set explicitly before launching a request that is not a first frag. This is an issue especially when multiple ahash reqs are computed in parallel or chained with cipher request, as the state of the request being computed is not updated into the hash engine. It leads to non-deterministic corrupted digest results. Fixes: commit 2786cee8e50b ("crypto: marvell - Move SRAM I/O operations to step functions") Signed-off-by: Romain Perier Acked-by: Boris Brezillon Cc: Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.h | 3 ++- drivers/crypto/marvell/hash.c | 34 +++++++++++++++++++++++++++++++++- drivers/crypto/marvell/tdma.c | 9 ++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index a768da7138a1..b7872f62f674 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -273,7 +273,8 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_SRC_IN_SRAM BIT(30) #define CESA_TDMA_END_OF_REQ BIT(29) #define CESA_TDMA_BREAK_CHAIN BIT(28) -#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) +#define CESA_TDMA_SET_STATE BIT(27) +#define CESA_TDMA_TYPE_MSK GENMASK(26, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 2a9260559654..585c90f9f606 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -281,13 +281,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) sreq->offset = 0; } +static void mv_cesa_ahash_dma_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *base = &creq->base; + + /* We must explicitly set the digest state. */ + if (base->chain.first->flags & CESA_TDMA_SET_STATE) { + struct mv_cesa_engine *engine = base->engine; + int i; + + /* Set the hash state in the IVDIG regs. */ + for (i = 0; i < ARRAY_SIZE(creq->state); i++) + writel_relaxed(creq->state[i], engine->regs + + CESA_IVDIG(i)); + } + + mv_cesa_dma_step(base); +} + static void mv_cesa_ahash_step(struct crypto_async_request *req) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->base); + mv_cesa_ahash_dma_step(ahashreq); else mv_cesa_ahash_std_step(ahashreq); } @@ -585,12 +604,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; + bool set_state = false; int ret; u32 type; basereq->chain.first = NULL; basereq->chain.last = NULL; + if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl)) + set_state = true; + if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); @@ -684,6 +707,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (type != CESA_TDMA_RESULT) basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; + if (set_state) { + /* + * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to + * let the step logic know that the IVDIG registers should be + * explicitly set before launching a TDMA chain. + */ + basereq->chain.first->flags |= CESA_TDMA_SET_STATE; + } + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4416b88eca70..c76375ff376d 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, last->next = dreq->chain.first; engine->chain.last = dreq->chain.last; - if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain, or if the request + * being queued needs the IV regs to be set before lauching + * the request. + */ + if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && + !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) last->next_dma = dreq->chain.first->cur_dma; } } -- cgit v1.2.3 From c8b1b3dd89ea7b3f77a73e59c4c4495e16338e15 Mon Sep 17 00:00:00 2001 From: Brendan McGrath Date: Sat, 10 Dec 2016 21:20:42 +1100 Subject: HID: asus: Fix keyboard support The previous submission which added Touchpad support broke the Keyboard support of this driver. This patch: 1. fixes the Keyboard support (by assigning drvdata->input); 2. renames NOTEBOOK_QUIRKS to KEYBOARD_QUIRKS; 3. adds the NO_INIT_REPORT quirk to the KEYBOARD_QUIRKS; and 4. sets the input->name to 'Asus Keyboard' for the keyboard Signed-off-by: Brendan McGrath Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index d40ed9fdf68d..70b12f89a193 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -64,7 +64,8 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad"); #define QUIRK_SKIP_INPUT_MAPPING BIT(2) #define QUIRK_IS_MULTITOUCH BIT(3) -#define NOTEBOOK_QUIRKS QUIRK_FIX_NOTEBOOK_REPORT +#define KEYBOARD_QUIRKS (QUIRK_FIX_NOTEBOOK_REPORT | \ + QUIRK_NO_INIT_REPORTS) #define TOUCHPAD_QUIRKS (QUIRK_NO_INIT_REPORTS | \ QUIRK_SKIP_INPUT_MAPPING | \ QUIRK_IS_MULTITOUCH) @@ -170,11 +171,11 @@ static int asus_raw_event(struct hid_device *hdev, static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi) { + struct input_dev *input = hi->input; struct asus_drvdata *drvdata = hid_get_drvdata(hdev); if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { int ret; - struct input_dev *input = hi->input; input_set_abs_params(input, ABS_MT_POSITION_X, 0, MAX_X, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, MAX_Y, 0, 0); @@ -191,10 +192,10 @@ static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi) hid_err(hdev, "Asus input mt init slots failed: %d\n", ret); return ret; } - - drvdata->input = input; } + drvdata->input = input; + return 0; } @@ -286,7 +287,11 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) goto err_stop_hw; } - drvdata->input->name = "Asus TouchPad"; + if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { + drvdata->input->name = "Asus TouchPad"; + } else { + drvdata->input->name = "Asus Keyboard"; + } if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { ret = asus_start_multitouch(hdev); @@ -315,7 +320,7 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, static const struct hid_device_id asus_devices[] = { { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, - USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), NOTEBOOK_QUIRKS}, + USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), KEYBOARD_QUIRKS}, { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_TOUCHPAD), TOUCHPAD_QUIRKS }, { } -- cgit v1.2.3 From 2b6579d4a71afb19c6583470783371b992944f67 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Thu, 8 Dec 2016 19:09:50 -0800 Subject: HID: sony: Fix error handling bug when touchpad registration fails The error handling code in sony_input_configured in general uses goto based cleanup. Recently we migrated code from sony_probe to here, but we didn't update the existing touchpad registration code, which was already here to use the goto. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 7687c0875395..f68c921af55e 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2548,7 +2548,7 @@ static int sony_input_configured(struct hid_device *hdev, hid_err(sc->hdev, "Unable to initialize multi-touch slots: %d\n", ret); - return ret; + goto err_stop; } sony_init_output_report(sc, dualshock4_send_output_report); -- cgit v1.2.3 From c70d5f70ccbbdf56bb86adb42127db90d0c90976 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Thu, 8 Dec 2016 19:09:51 -0800 Subject: HID: sony: Use DS4 MAC address as unique identifier on USB The DS4 MAC address is reported as a unique identified when using Bluetooth. For USB there is no unique identifier reported yet, so use the MAC address. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index f68c921af55e..c6982a29a56d 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2390,6 +2390,12 @@ static int sony_check_add(struct sony_sc *sc) } memcpy(sc->mac_address, &buf[1], sizeof(sc->mac_address)); + + snprintf(sc->hdev->uniq, sizeof(sc->hdev->uniq), + "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + sc->mac_address[5], sc->mac_address[4], + sc->mac_address[3], sc->mac_address[2], + sc->mac_address[1], sc->mac_address[0]); } else if ((sc->quirks & SIXAXIS_CONTROLLER_USB) || (sc->quirks & NAVIGATION_CONTROLLER_USB)) { buf = kmalloc(SIXAXIS_REPORT_0xF2_SIZE, GFP_KERNEL); -- cgit v1.2.3 From 405182c2459fe2de4a3994ef39e866993e0e61d1 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Thu, 8 Dec 2016 19:09:52 -0800 Subject: HID: sony: Ignore DS4 dongle reports when no device is connected When the DS4 dongle is connected, it always generates HID reports even when no DS4 is paired to it. This patch adds logic to ignore HID reports from the dongle if there is no DS4 currently attached. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index c6982a29a56d..f405b07d0381 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1099,8 +1099,11 @@ struct sony_sc { u8 led_delay_on[MAX_LEDS]; u8 led_delay_off[MAX_LEDS]; u8 led_count; + bool ds4_dongle_connected; }; +static void sony_set_leds(struct sony_sc *sc); + static inline void sony_schedule_work(struct sony_sc *sc) { if (!sc->defer_initialization) @@ -1430,6 +1433,31 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report, return -EILSEQ; } } + + /* + * In the case of a DS4 USB dongle, bit[2] of byte 31 indicates + * if a DS4 is actually connected (indicated by '0'). + * For non-dongle, this bit is always 0 (connected). + */ + if (sc->hdev->vendor == USB_VENDOR_ID_SONY && + sc->hdev->product == USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) { + bool connected = (rd[31] & 0x04) ? false : true; + + if (!sc->ds4_dongle_connected && connected) { + hid_info(sc->hdev, "DualShock 4 USB dongle: controller connected\n"); + sony_set_leds(sc); + sc->ds4_dongle_connected = true; + } else if (sc->ds4_dongle_connected && !connected) { + hid_info(sc->hdev, "DualShock 4 USB dongle: controller disconnected\n"); + sc->ds4_dongle_connected = false; + /* Return 0, so hidraw can get the report. */ + return 0; + } else if (!sc->ds4_dongle_connected) { + /* Return 0, so hidraw can get the report. */ + return 0; + } + } + dualshock4_parse_report(sc, rd, size); } -- cgit v1.2.3 From 8f2b468aadc81ca0fc78e41696b648e30d91ba5c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 20 Dec 2016 07:27:59 +0100 Subject: s390/vtime: correct system time accounting There is a slight misaccounting of system time in vtime_account_user. This function is called once per HZ tick in interrupt context. The irq_enter function already accounted the system time up to the point of the irq_enter call. The system time from irq_enter until vtime_account_user/do_account_vtime is reached is irq time but it is accounted to the previous context. Just drop the hardirq offset from arch/s390/kernel/vtime.c. Reported-by: Frederic Weisbecker Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vtime.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 6b246aadf311..1b5c5ee9fc1b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -94,7 +94,7 @@ static void update_mt_scaling(void) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) +static int do_account_vtime(struct task_struct *tsk) { u64 timer, clock, user, system, steal; u64 user_scaled, system_scaled; @@ -138,7 +138,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) } account_user_time(tsk, user); tsk->utimescaled += user_scaled; - account_system_time(tsk, hardirq_offset, system); + account_system_time(tsk, 0, system); tsk->stimescaled += system_scaled; steal = S390_lowcore.steal_timer; @@ -152,7 +152,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) void vtime_task_switch(struct task_struct *prev) { - do_account_vtime(prev, 0); + do_account_vtime(prev); prev->thread.user_timer = S390_lowcore.user_timer; prev->thread.system_timer = S390_lowcore.system_timer; S390_lowcore.user_timer = current->thread.user_timer; @@ -166,7 +166,7 @@ void vtime_task_switch(struct task_struct *prev) */ void vtime_account_user(struct task_struct *tsk) { - if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + if (do_account_vtime(tsk)) virt_timer_expire(); } -- cgit v1.2.3 From cabab3f9f5ca077535080b3252e6168935b914af Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Dec 2016 12:58:10 +0100 Subject: s390/kbuild: enable modversions for symbols exported from asm s390 version of commit 334bb7738764 ("x86/kbuild: enable modversions for symbols exported from asm") so we get also rid of all these warnings: WARNING: EXPORT symbol "_mcount" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "memcpy" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "memmove" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "memset" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "save_fpu_regs" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "sie64a" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "sie_exit" [vmlinux] version generation failed, symbol will not be versioned. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/asm-prototypes.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 arch/s390/include/asm/asm-prototypes.h diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..2c3413b0ca52 --- /dev/null +++ b/arch/s390/include/asm/asm-prototypes.h @@ -0,0 +1,8 @@ +#ifndef _ASM_S390_PROTOTYPES_H + +#include +#include +#include +#include + +#endif /* _ASM_S390_PROTOTYPES_H */ -- cgit v1.2.3 From b508fc354f6d168ec18673d99d3bce9d6c1d9475 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Dec 2016 11:20:45 +0100 Subject: nvme: update maintainers information Switch the new, shared nvme git repository, which is co-maintained by everyone involved with NVMe. Also add the nvme_ioctl.h UAPI header to the files list. Signed-off-by: Christoph Hellwig --- MAINTAINERS | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f6eb97b35e0f..9b279928461d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8852,17 +8852,22 @@ F: drivers/video/fbdev/nvidia/ NVM EXPRESS DRIVER M: Keith Busch M: Jens Axboe +M: Christoph Hellwig +M: Sagi Grimberg L: linux-nvme@lists.infradead.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git -W: https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/ +T: git://git.infradead.org/nvme.git +W: http://git.infradead.org/nvme.git S: Supported F: drivers/nvme/host/ F: include/linux/nvme.h +F: include/uapi/linux/nvme_ioctl.h NVM EXPRESS TARGET DRIVER M: Christoph Hellwig M: Sagi Grimberg L: linux-nvme@lists.infradead.org +T: git://git.infradead.org/nvme.git +W: http://git.infradead.org/nvme.git S: Supported F: drivers/nvme/target/ -- cgit v1.2.3 From e6282aef7b89a11d26e731060c4409b7aac278bf Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Dec 2016 11:37:50 -0500 Subject: nvme: simplify stripe quirk Some OEMs believe they own the Identify Controller vendor specific region and will repurpose it with their own values. While not common, we can't rely on the PCI VID:DID to tell use how to decode the field we reserved for this as the stripe size so we need to do something else for the list of devices using this quirk. The field was supposed to allow flexibility on the device's back-end striping, but it turned out that never materialized; the chunk is always the same as MDTS in the products subscribing to this quirk, so this patch removes the stripe_size field and sets the chunk to the max hw transfer size for the devices using this quirk. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++--------------- drivers/nvme/host/nvme.h | 1 - 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b40cfb076f02..2fc86dc7a8df 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1193,8 +1193,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if (ctrl->stripe_size) - blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); + if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) + blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; @@ -1250,19 +1250,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_hw_sectors = min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { - unsigned int max_hw_sectors; - - ctrl->stripe_size = 1 << (id->vs[3] + page_shift); - max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); - if (ctrl->max_hw_sectors) { - ctrl->max_hw_sectors = min(max_hw_sectors, - ctrl->max_hw_sectors); - } else { - ctrl->max_hw_sectors = max_hw_sectors; - } - } - nvme_set_queue_limits(ctrl, ctrl->admin_q); ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bd5321441d12..6377e14586dc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -135,7 +135,6 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; - u32 stripe_size; u16 oncs; u16 vid; atomic_t abort_limit; -- cgit v1.2.3 From 9fa196e7fc7a0f12329d5346164abb27f026991c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 19 Dec 2016 16:18:24 +0200 Subject: nvme/pci: Fix whitespace problem Convert to tabs and remove unneeded whitespaces. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2fd7dc2e8fc4..7d6c87028568 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -50,7 +50,7 @@ #define NVME_AQ_DEPTH 256 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) - + /* * We handle AEN commands ourselves and don't even let the * block layer know about them. @@ -1909,10 +1909,10 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!dev->bar) goto release; - return 0; + return 0; release: - pci_release_mem_regions(pdev); - return -ENODEV; + pci_release_mem_regions(pdev); + return -ENODEV; } static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) -- cgit v1.2.3 From ff13b39ecf726715a96fcd3c23e50eb792ef6516 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 Dec 2016 12:08:58 -0500 Subject: nvme/pci: Delete misleading queue-wrap comment It is not theoretically possible for this driver to wrap twice while processing completions. The driver allocates only 'queue_depth - 1' tags, so there can never be more than that to reap when processing a completion queue. Removing this misleading comment makes it a little less likely people with broken controllers will blame the driver for their spurious interrupts. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7d6c87028568..151ce59f4ffb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -712,15 +712,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); nvme_req(req)->result = cqe.result; blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); - } - /* If the controller ignores the cq head doorbell and continuously - * writes to the queue, it is theoretically possible to wrap around - * the queue twice and mistakenly return IRQ_NONE. Linux only - * requires that 0.1% of your interrupts are handled, so this isn't - * a big problem. - */ if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; -- cgit v1.2.3 From 2c473a9d02fbe881506d5d43bc7edb776f2f46f5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 6 Dec 2016 10:14:35 -0800 Subject: nvme/scsi: Remove START STOP emulation Now that the broken power state control is gone, it appears to serve no purpose. Just delete it. NVME devices don't have a concept of started vs stopped anyway. Signed-off-by: Andy Lutomirski Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/scsi.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index b71e95044b43..a5c09e703bd8 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -2160,30 +2160,6 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, return nvme_trans_status_code(hdr, nvme_sc); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) -{ - u8 immed, no_flush; - - immed = cmd[1] & 0x01; - no_flush = cmd[4] & 0x04; - - if (immed != 0) { - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - } else { - if (no_flush == 0) { - /* Issue NVME FLUSH command prior to START STOP UNIT */ - int res = nvme_trans_synchronize_cache(ns, hdr); - if (res) - return res; - } - - return 0; - } -} - static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { @@ -2439,9 +2415,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case SECURITY_PROTOCOL_OUT: retcode = nvme_trans_security_protocol(ns, hdr, cmd); break; - case START_STOP: - retcode = nvme_trans_start_stop(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; -- cgit v1.2.3 From c703489885218900579279cec4b4ab8e7fce383b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 20 Dec 2016 11:06:08 -0800 Subject: nvme/fc: correct some printk information Dan Carpenters's tool caught a pointer reference - should have been just ptr, not &ptr. Don't bother. Remove the pointer value in the printf. Its irrelevant. Signed-off-by: James Smart Reported-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 771e2e761872..827c2b57e5bb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2401,8 +2401,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, WARN_ON_ONCE(!changed); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl); + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); kref_get(&ctrl->ctrl.kref); -- cgit v1.2.3 From 17a1ec08ce7074f05795e5c32a3e5bc9a797bbf8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 15 Dec 2016 14:20:48 +0100 Subject: nvme/fc: simplify error handling of nvme_fc_create_hw_io_queues Simplify the error handling of nvme_fc_create_hw_io_queues(), this saves us one variable and one level of indentation. Signed-off-by: Johannes Thumshirn Reviwed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 827c2b57e5bb..aa0bc60810a7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1491,19 +1491,20 @@ static int nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) { struct nvme_fc_queue *queue = &ctrl->queues[1]; - int i, j, ret; + int i, ret; for (i = 1; i < ctrl->queue_count; i++, queue++) { ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); - if (ret) { - for (j = i-1; j >= 0; j--) - __nvme_fc_delete_hw_queue(ctrl, - &ctrl->queues[j], j); - return ret; - } + if (ret) + goto delete_queues; } return 0; + +delete_queues: + for (; i >= 0; i--) + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); + return ret; } static int -- cgit v1.2.3 From 6c73f949300f17851f53fa80c9d1611ccd6909d3 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Fri, 9 Dec 2016 12:59:46 -0700 Subject: nvmet: fix KATO offset in Set Features The Set Features implementation for Keep Alive Timer was using the wrong structure when retrieving the KATO value; it was treating the Set Features command as a Property Set command. The NVMe spec defines the Keep Alive Timer feature as having one input in CDW11 (4 bytes at offset 44 in the command) whereas the code was reading 8 bytes at offset 48. Since the Linux NVMe over Fabrics host never sets this feature, this code has presumably never been tested. Signed-off-by: Daniel Verkamp Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index ec1ad2aa0a4c..95ae52390478 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -382,7 +382,6 @@ static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); - u64 val; u32 val32; u16 status = 0; @@ -392,8 +391,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; case NVME_FEAT_KATO: - val = le64_to_cpu(req->cmd->prop_set.value); - val32 = val & 0xffff; + val32 = le32_to_cpu(req->cmd->common.cdw10[1]); req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; -- cgit v1.2.3 From 7c3a23b85cac5f3caa531f369c1e3a5f1a8b555f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Dec 2016 14:59:47 +0000 Subject: nvmet/fcloop: remove some logically dead code performing redundant ret checks The check to see if ret is non-zero and return this rather than count is redundant in two occassions. It is redundant because prior to this check, the return code ret is already checked for a non-zero error return value and we return from the function at that point. Signed-off-by: Colin Ian King Reviewed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index bcb8ebeb01c5..4e8e6a22bce1 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -845,7 +845,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->lport = nport->lport; nport->rport = rport; - return ret ? ret : count; + return count; } @@ -952,7 +952,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->lport = nport->lport; nport->tport = tport; - return ret ? ret : count; + return count; } -- cgit v1.2.3 From 64d656a162d7ba49d6d1863e41407b0f95e19258 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Dec 2016 19:20:45 +0100 Subject: block: add back plugging in __blkdev_direct_IO This allows sending larger than 1 MB requests to devices that support large I/O sizes. Signed-off-by: Christoph Hellwig Reported-by: Laurence Oberman Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 7c4507224ed6..206a92aab52e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -328,6 +328,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(inode); + struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ); @@ -353,6 +354,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->multi_bio = false; dio->should_dirty = is_read && (iter->type == ITER_IOVEC); + blk_start_plug(&plug); for (;;) { bio->bi_bdev = bdev; bio->bi_iter.bi_sector = pos >> 9; @@ -394,6 +396,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) submit_bio(bio); bio = bio_alloc(GFP_KERNEL, nr_pages); } + blk_finish_plug(&plug); if (!dio->is_sync) return -EIOCBQUEUED; -- cgit v1.2.3 From 72c5296f9d64d8f5f27c2133e5f108a45a353d71 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 20 Dec 2016 14:38:14 -0700 Subject: genhd: remove dead and duplicated scsi code blk_scsi_cmd_filter use was deprecated by 4beab5c6 and the SCSI macros are duplicated in blkdev.h, both likely reintroduced by a bad merge from 540eed56. Signed-off-by: Jon Derrick Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e0341af6950e..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -146,15 +146,6 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ }; -#define BLK_SCSI_MAX_CMDS (256) -#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) - -struct blk_scsi_cmd_filter { - unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; - unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; - struct kobject kobj; -}; - struct disk_part_tbl { struct rcu_head rcu_head; int len; -- cgit v1.2.3 From e568df6b84ff05a22467503afc11bee7a6ba0700 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 16:42:53 +0200 Subject: ext2: Return BH_New buffers for zeroed blocks So far we did not return BH_New buffers from ext2_get_blocks() when we allocated and zeroed-out a block for DAX inode to avoid racy zeroing in DAX code. This zeroing is gone these days so we can remove the workaround. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext2/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0093ea2512a8..f073bfca694b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode, mutex_unlock(&ei->truncate_mutex); goto cleanup; } - } else { - *new = true; } + *new = true; ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); -- cgit v1.2.3 From c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:22:44 +0200 Subject: mm: Invalidate DAX radix tree entries only if appropriate Currently invalidate_inode_pages2_range() and invalidate_mapping_pages() just delete all exceptional radix tree entries they find. For DAX this is not desirable as we track cache dirtiness in these entries and when they are evicted, we may not flush caches although it is necessary. This can for example manifest when we write to the same block both via mmap and via write(2) (to different offsets) and fsync(2) then does not properly flush CPU caches when modification via write(2) was the last one. Create appropriate DAX functions to handle invalidation of DAX entries for invalidate_inode_pages2_range() and invalidate_mapping_pages() and wire them up into the corresponding mm functions. Acked-by: Johannes Weiner Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 71 +++++++++++++++++++++++++++++++++++++++++++------- include/linux/dax.h | 3 +++ mm/truncate.c | 75 +++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 125 insertions(+), 24 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a8732fbed381..bcfedd184860 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } +static int __dax_invalidate_mapping_entry(struct address_space *mapping, + pgoff_t index, bool trunc) +{ + int ret = 0; + void *entry; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = get_unlocked_mapping_entry(mapping, index, NULL); + if (!entry || !radix_tree_exceptional_entry(entry)) + goto out; + if (!trunc && + (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) + goto out; + radix_tree_delete(page_tree, index); + mapping->nrexceptional--; + ret = 1; +out: + put_unlocked_mapping_entry(mapping, index, entry); + spin_unlock_irq(&mapping->tree_lock); + return ret; +} /* * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * entry to get unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { - void *entry; + int ret = __dax_invalidate_mapping_entry(mapping, index, true); - spin_lock_irq(&mapping->tree_lock); - entry = get_unlocked_mapping_entry(mapping, index, NULL); /* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the @@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * caller has seen exceptional entry for this index, we better find it * at that index as well... */ - if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { - spin_unlock_irq(&mapping->tree_lock); - return 0; - } - radix_tree_delete(&mapping->page_tree, index); + WARN_ON_ONCE(!ret); + return ret; +} + +/* + * Invalidate exceptional DAX entry if easily possible. This handles DAX + * entries for invalidate_inode_pages() so we evict the entry only if we can + * do so without blocking. + */ +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) +{ + int ret = 0; + void *entry, **slot; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = __radix_tree_lookup(page_tree, index, NULL, &slot); + if (!entry || !radix_tree_exceptional_entry(entry) || + slot_locked(mapping, slot)) + goto out; + if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) + goto out; + radix_tree_delete(page_tree, index); mapping->nrexceptional--; + ret = 1; +out: spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, entry, true); + if (ret) + dax_wake_mapping_entry_waiter(mapping, index, entry, true); + return ret; +} - return 1; +/* + * Invalidate exceptional DAX entry if it is clean. + */ +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index) +{ + return __dax_invalidate_mapping_entry(mapping, index, false); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index f97bcfe79472..24ad71173995 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); diff --git a/mm/truncate.c b/mm/truncate.c index fd97f1dbce29..dd7b24e083c5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -24,20 +24,12 @@ #include #include "internal.h" -static void clear_exceptional_entry(struct address_space *mapping, - pgoff_t index, void *entry) +static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, + void *entry) { struct radix_tree_node *node; void **slot; - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) - return; - - if (dax_mapping(mapping)) { - dax_delete_mapping_entry(mapping, index); - return; - } spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even @@ -55,6 +47,56 @@ unlock: spin_unlock_irq(&mapping->tree_lock); } +/* + * Unconditionally remove exceptional entry. Usually called from truncate path. + */ +static void truncate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + if (dax_mapping(mapping)) { + dax_delete_mapping_entry(mapping, index); + return; + } + clear_shadow_entry(mapping, index, entry); +} + +/* + * Invalidate exceptional entry if easily possible. This handles exceptional + * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and + * clean entries. + */ +static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + +/* + * Invalidate exceptional entry if clean. This handles exceptional entries for + * invalidate_inode_pages2() so for DAX it evicts only clean entries. + */ +static int invalidate_exceptional_entry2(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry_sync(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + /** * do_invalidatepage - invalidate part or all of a page * @page: the page which is affected @@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, } if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + invalidate_exceptional_entry(mapping, index, + page); continue; } @@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + if (!invalidate_exceptional_entry2(mapping, + index, page)) + ret = -EBUSY; continue; } -- cgit v1.2.3 From e3fce68cdbed297d927e993b3ea7b8b1cee545da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:10:28 +0200 Subject: dax: Avoid page invalidation races and unnecessary radix tree traversals Currently dax_iomap_rw() takes care of invalidating page tables and evicting hole pages from the radix tree when write(2) to the file happens. This invalidation is only necessary when there is some block allocation resulting from write(2). Furthermore in current place the invalidation is racy wrt page fault instantiating a hole page just after we have invalidated it. So perform the page invalidation inside dax_iomap_actor() where we can do it only when really necessary and after blocks have been allocated so nobody will be instantiating new hole pages anymore. Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bcfedd184860..08e15db28b79 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -985,6 +985,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; + /* + * Write can allocate block for an area which has a hole page mapped + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ + if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); + } + while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); struct blk_dax_ctl dax = { 0 }; @@ -1043,23 +1054,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. We'll eventually need to shift this down even further so that - * we can check if we allocated blocks over a hole first. - */ - if (mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, - (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); -- cgit v1.2.3 From f449b936f1aff7696b24a338f493d5cee8d48d55 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Oct 2016 14:48:38 +0200 Subject: dax: Finish fault completely when loading holes The only case when we do not finish the page fault completely is when we are loading hole pages into a radix tree. Avoid this special case and finish the fault in that case as well inside the DAX fault handler. It will allow us for easier iomap handling. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 08e15db28b79..bfec6f2ef613 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -539,15 +539,16 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, * otherwise it will simply fall out of the page cache under memory * pressure without ever having been dirtied. */ -static int dax_load_hole(struct address_space *mapping, void *entry, +static int dax_load_hole(struct address_space *mapping, void **entry, struct vm_fault *vmf) { struct page *page; + int ret; /* Hole page already exists? Return it... */ - if (!radix_tree_exceptional_entry(entry)) { - vmf->page = entry; - return VM_FAULT_LOCKED; + if (!radix_tree_exceptional_entry(*entry)) { + page = *entry; + goto out; } /* This will replace locked radix tree entry with a hole page */ @@ -555,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry, vmf->gfp_mask | __GFP_ZERO); if (!page) return VM_FAULT_OOM; + out: vmf->page = page; - return VM_FAULT_LOCKED; + ret = finish_fault(vmf); + vmf->page = NULL; + *entry = page; + if (!ret) { + /* Grab reference for PTE that is now referencing the page */ + get_page(page); + return VM_FAULT_NOPAGE; + } + return ret; } static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, @@ -1163,8 +1173,8 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { - vmf_ret = dax_load_hole(mapping, entry, vmf); - break; + vmf_ret = dax_load_hole(mapping, &entry, vmf); + goto finish_iomap; } /*FALLTHRU*/ default: @@ -1185,8 +1195,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } } unlock_entry: - if (vmf_ret != VM_FAULT_LOCKED || error) - put_locked_mapping_entry(mapping, vmf->pgoff, entry); + put_locked_mapping_entry(mapping, vmf->pgoff, entry); out: if (error == -ENOMEM) return VM_FAULT_OOM | major; -- cgit v1.2.3 From 9f141d6ef6258a3a37a045842d9ba7e68f368956 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Oct 2016 14:34:31 +0200 Subject: dax: Call ->iomap_begin without entry lock during dax fault Currently ->iomap_begin() handler is called with entry lock held. If the filesystem held any locks between ->iomap_begin() and ->iomap_end() (such as ext4 which will want to hold transaction open), this would cause lock inversion with the iomap_apply() from standard IO path which first calls ->iomap_begin() and only then calls ->actor() callback which grabs entry locks for DAX (if it faults when copying from/to user provided buffers). Fix the problem by nesting grabbing of entry lock inside ->iomap_begin() - ->iomap_end() pair. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 121 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 66 insertions(+), 55 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bfec6f2ef613..5c74f60d0a50 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1078,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(dax_iomap_rw); +static int dax_fault_return(int error) +{ + if (error == 0) + return VM_FAULT_NOPAGE; + if (error == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} + /** * dax_iomap_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred @@ -1110,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (pos >= i_size_read(inode)) return VM_FAULT_SIGBUS; - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); - if (IS_ERR(entry)) { - error = PTR_ERR(entry); - goto out; - } - if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; @@ -1126,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); if (error) - goto unlock_entry; + return dax_fault_return(error); if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { - error = -EIO; /* fs corruption? */ + vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ + goto finish_iomap; + } + + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); + if (IS_ERR(entry)) { + vmf_ret = dax_fault_return(PTR_ERR(entry)); goto finish_iomap; } @@ -1151,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } if (error) - goto finish_iomap; + goto error_unlock_entry; __SetPageUptodate(vmf->cow_page); vmf_ret = finish_fault(vmf); if (!vmf_ret) vmf_ret = VM_FAULT_DONE_COW; - goto finish_iomap; + goto unlock_entry; } switch (iomap.type) { @@ -1169,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE, &entry, vma, vmf); + /* -EBUSY is fine, somebody else faulted on the same PTE */ + if (error == -EBUSY) + error = 0; break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { vmf_ret = dax_load_hole(mapping, &entry, vmf); - goto finish_iomap; + goto unlock_entry; } /*FALLTHRU*/ default: @@ -1183,30 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, break; } - finish_iomap: - if (ops->iomap_end) { - if (error || (vmf_ret & VM_FAULT_ERROR)) { - /* keep previous error */ - ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PAGE_SIZE, - PAGE_SIZE, flags, &iomap); - } - } + error_unlock_entry: + vmf_ret = dax_fault_return(error) | major; unlock_entry: put_locked_mapping_entry(mapping, vmf->pgoff, entry); - out: - if (error == -ENOMEM) - return VM_FAULT_OOM | major; - /* -EBUSY is fine, somebody else faulted on the same PTE */ - if (error < 0 && error != -EBUSY) - return VM_FAULT_SIGBUS | major; - if (vmf_ret) { - WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */ - return vmf_ret; + finish_iomap: + if (ops->iomap_end) { + int copied = PAGE_SIZE; + + if (vmf_ret & VM_FAULT_ERROR) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PTE we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } - return VM_FAULT_NOPAGE | major; + return vmf_ret; } EXPORT_SYMBOL_GPL(dax_iomap_fault); @@ -1330,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((pgoff | PG_PMD_COLOUR) > max_pgoff) goto fallback; - /* - * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX - * PMD or a HZP entry. If it can't (because a 4k page is already in - * the tree, for instance), it will return -EEXIST and we just fall - * back to 4k entries. - */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); - if (IS_ERR(entry)) - goto fallback; - /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way @@ -1348,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) - goto unlock_entry; + goto fallback; + if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto finish_iomap; + vmf.pgoff = pgoff; vmf.flags = flags; vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; @@ -1364,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) - goto finish_iomap; + goto unlock_entry; result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, &entry); break; @@ -1373,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, break; } + unlock_entry: + put_locked_mapping_entry(mapping, pgoff, entry); finish_iomap: if (ops->iomap_end) { - if (result == VM_FAULT_FALLBACK) { - ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE, - iomap_flags, &iomap); - if (error) - result = VM_FAULT_FALLBACK; - } + int copied = PMD_SIZE; + + if (result == VM_FAULT_FALLBACK) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PMD we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, + &iomap); } - unlock_entry: - put_locked_mapping_entry(mapping, pgoff, entry); fallback: if (result == VM_FAULT_FALLBACK) { split_huge_pmd(vma, pmd, address); -- cgit v1.2.3 From 1db175428ee374489448361213e9c3b749d14900 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Oct 2016 11:33:49 +0200 Subject: ext4: Simplify DAX fault path Now that dax_iomap_fault() calls ->iomap_begin() without entry lock, we can use transaction starting in ext4_iomap_begin() and thus simplify ext4_dax_fault(). It also provides us proper retries in case of ENOSPC. Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext4/file.c | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b5f184493c57..d663d3d7c81c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -258,7 +258,6 @@ out: static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - EXT4_DATA_TRANS_BLOCKS(sb)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else - result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + } + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = flags & FAULT_FLAG_WRITE; @@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - ext4_chunk_trans_blocks(inode, - PMD_SIZE / PAGE_SIZE)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else { - result = dax_iomap_pmd_fault(vma, addr, pmd, flags, - &ext4_iomap_ops); } - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_pmd_fault(vma, addr, pmd, flags, + &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } -- cgit v1.2.3 From 02608e02fbec04fccf2eb0cc8d8082f65c0a4286 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Dec 2016 12:32:54 -0800 Subject: crypto: testmgr - Use heap buffer for acomp test input Christopher Covington reported a crash on aarch64 on recent Fedora kernels: kernel BUG at ./include/linux/scatterlist.h:140! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 752 Comm: cryptomgr_test Not tainted 4.9.0-11815-ge93b1cc #162 Hardware name: linux,dummy-virt (DT) task: ffff80007c650080 task.stack: ffff800008910000 PC is at sg_init_one+0xa0/0xb8 LR is at sg_init_one+0x24/0xb8 ... [] sg_init_one+0xa0/0xb8 [] test_acomp+0x10c/0x438 [] alg_test_comp+0xb0/0x118 [] alg_test+0x17c/0x2f0 [] cryptomgr_test+0x44/0x50 [] kthread+0xf8/0x128 [] ret_from_fork+0x10/0x50 The test vectors used for input are part of the kernel image. These inputs are passed as a buffer to sg_init_one which eventually blows up with BUG_ON(!virt_addr_valid(buf)). On arm64, virt_addr_valid returns false for the kernel image since virt_to_page will not return the correct page. Fix this by copying the input vectors to heap buffer before setting up the scatterlist. Reported-by: Christopher Covington Fixes: d7db7a882deb ("crypto: acomp - update testmgr with support for acomp") Signed-off-by: Laura Abbott Signed-off-by: Herbert Xu --- crypto/testmgr.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f616ad74cce7..44e888b0b041 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, for (i = 0; i < ctcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = ctemplate[i].inlen; + void *input_vec; + input_vec = kmalloc(ilen, GFP_KERNEL); + if (!input_vec) { + ret = -ENOMEM; + goto out; + } + + memcpy(input_vec, ctemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); - sg_init_one(&src, ctemplate[i].input, ilen); + sg_init_one(&src, input_vec, ilen); sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { pr_err("alg: acomp: request alloc failed for %s\n", algo); + kfree(input_vec); ret = -ENOMEM; goto out; } @@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, if (ret) { pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", i + 1, algo, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, i + 1, algo); hexdump(output, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } + kfree(input_vec); acomp_request_free(req); } for (i = 0; i < dtcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = dtemplate[i].inlen; + void *input_vec; + + input_vec = kmalloc(ilen, GFP_KERNEL); + if (!input_vec) { + ret = -ENOMEM; + goto out; + } + memcpy(input_vec, dtemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); - sg_init_one(&src, dtemplate[i].input, ilen); + sg_init_one(&src, input_vec, ilen); sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { pr_err("alg: acomp: request alloc failed for %s\n", algo); + kfree(input_vec); ret = -ENOMEM; goto out; } @@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, if (ret) { pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n", i + 1, algo, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } @@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, i + 1, algo); hexdump(output, req->dlen); ret = -EINVAL; + kfree(input_vec); acomp_request_free(req); goto out; } + kfree(input_vec); acomp_request_free(req); } -- cgit v1.2.3 From 66115335fbb411365c23349b2fbe7e041eabbaf2 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Fri, 23 Dec 2016 00:53:10 +0000 Subject: docs: Fix build failure The 80211.tmpl DocBook file was removed in commit 819bf593767c ("docs-rst: sphinxify 802.11 documentation"), but the 80211.xml target was re-added to the Makefile by commit 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document"), leading to a failure when building the documentation: *** No rule to make target 'Documentation/DocBook/80211.xml', needed by 'Documentation/DocBook/80211.aux.xml'. cc: stable@vger.kernel.org Signed-off-by: John Brooks Mea-culpa-by: Jonathan Corbet Signed-off-by: Jonathan Corbet --- Documentation/DocBook/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index c75e5d6b8fa8..a6eb7dcd4dd5 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \ kernel-api.xml filesystems.xml lsm.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - 80211.xml sh.xml regulator.xml w1.xml \ + sh.xml regulator.xml w1.xml \ writing_musb_glue_layer.xml iio.xml ifeq ($(DOCBOOKS),) -- cgit v1.2.3 From 36f671be1db1b17d3d4ab0c8b47f81ccb1efcb75 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Sat, 17 Dec 2016 19:42:17 +0200 Subject: Documentation/unaligned-memory-access.txt: fix incorrect comparison operator In the actual implementation ether_addr_equal function tests for equality to 0 when returning. It seems in commit 0d74c4 it is somehow overlooked to change this operator to reflect the actual function. Signed-off-by: Cihangir Akturk Signed-off-by: Jonathan Corbet --- Documentation/unaligned-memory-access.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt index a445da098bc6..3f76c0c37920 100644 --- a/Documentation/unaligned-memory-access.txt +++ b/Documentation/unaligned-memory-access.txt @@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2) #else const u16 *a = (const u16 *)addr1; const u16 *b = (const u16 *)addr2; - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } -- cgit v1.2.3 From fe4f6c801c03bc13113d0dc32f02d4ea8ed89ffd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 27 Dec 2016 19:46:27 -0500 Subject: fscrypt: fix the test_dummy_encryption mount option Commit f1c131b45410a: "crypto: xts - Convert to skcipher" now fails the setkey operation if the AES key is the same as the tweak key. Previously this check was only done if FIPS mode is enabled. Now this check is also done if weak key checking was requested. This is reasonable, but since we were using the dummy key which was a constant series of 0x42 bytes, it now caused dummy encrpyption test mode to fail. Fix this by using 0x42... and 0x24... for the two keys, so they are different. Fixes: f1c131b45410a202eb45cc55980a7a9e4e4b4f40 Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o --- fs/crypto/keyinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 6eeea1dcba41..95cd4c3b06c3 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -248,7 +248,8 @@ retry: goto out; if (fscrypt_dummy_context_enabled(inode)) { - memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); + memset(raw_key, 0x42, keysize/2); + memset(raw_key+keysize/2, 0x24, keysize - (keysize/2)); goto got_key; } -- cgit v1.2.3 From 5701659004d68085182d2fd4199c79172165fa65 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 27 Dec 2016 18:23:06 -0800 Subject: net: stmmac: Fix race between stmmac_drv_probe and stmmac_open There is currently a small window during which the network device registered by stmmac can be made visible, yet all resources, including and clock and MDIO bus have not had a chance to be set up, this can lead to the following error to occur: [ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): stmmac_dvr_probe: warning: cannot get CSR clock [ 473.919382] stmmaceth 0000:01:00.0: no reset control found [ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42 [ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported [ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported [ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported [ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): Enable RX Mitigation via HW Watchdog Timer [ 473.921395] libphy: PHY stmmac-1:00 not found [ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY [ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to PHY (error: -19) [ 473.959710] libphy: stmmac: probed [ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL (stmmac-1:00) active [ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL (stmmac-1:01) [ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL (stmmac-1:02) [ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL (stmmac-1:03) Fix this by making sure that register_netdev() is the last thing being done, which guarantees that the clock and the MDIO bus are available. Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove") Reported-by: Kweh, Hock Leong Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bb40382e205d..5910ea51f8f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3339,13 +3339,6 @@ int stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); - ret = register_netdev(ndev); - if (ret) { - netdev_err(priv->dev, "%s: ERROR %i registering the device\n", - __func__, ret); - goto error_netdev_register; - } - /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -3372,11 +3365,14 @@ int stmmac_dvr_probe(struct device *device, } } - return 0; + ret = register_netdev(ndev); + if (ret) + netdev_err(priv->dev, "%s: ERROR %i registering the device\n", + __func__, ret); + + return ret; error_mdio_register: - unregister_netdev(ndev); -error_netdev_register: netif_napi_del(&priv->napi); error_hw_init: clk_disable_unprepare(priv->pclk); -- cgit v1.2.3 From 0df0f207aab4f42e5c96a807adf9a6845b69e984 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 28 Dec 2016 14:54:47 +0200 Subject: net/sched: cls_flower: Fix missing addr_type in classify Since we now use a non zero mask on addr_type, we are matching on its value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip failed in SW/classify path since its value was zero. This patch sets the proper value of addr_type for encapsulated packets. Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type') Signed-off-by: Paul Blakey Reviewed-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 333f8e268431..970db7a41684 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -153,10 +153,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, switch (ip_tunnel_info_af(info)) { case AF_INET: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV4_ADDRS; skb_key.enc_ipv4.src = key->u.ipv4.src; skb_key.enc_ipv4.dst = key->u.ipv4.dst; break; case AF_INET6: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV6_ADDRS; skb_key.enc_ipv6.src = key->u.ipv6.src; skb_key.enc_ipv6.dst = key->u.ipv6.dst; break; -- cgit v1.2.3 From 9da34cd34e85aacc55af8774b81b1f23e86014f9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 28 Dec 2016 14:58:31 +0200 Subject: net/mlx5: Disable RoCE on the e-switch management port under switchdev mode Under the switchdev/offloads mode, packets that don't match any e-switch steering rule are sent towards the e-switch management port. We use a NIC HW steering rule set per vport (uplink and VFs) to make them be received into the host OS through the respective vport representor netdevice. Currnetly such missed RoCE packets will not get to this NIC steering rule, and hence VF RoCE will not work over the slow path of the offloads mode. This is b/c these packets will be matched by a steering rule added by the firmware that serves RoCE traffic set on the PF NIC vport which is also the e-switch management port under SRIOV. Disabling RoCE on the e-switch management vport when we are in the offloads mode, will signal to the firmware to remove their RoCE rule, and then the missed RoCE packets will be matched by the representor NIC steering rule as any other missed packets. To achieve that, we disable RoCE on the PF vport. We do that by removing (hot-unplugging) the IB device instance associated with the PF. This is also required by our current model where the PF serves as the uplink representor and hence only SW switching (TC, bridge, OVS) applications and slow path vport mlx5e net-device should be running over that vport. Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode changes') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 466e161010f7..03293ed1cc22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -695,6 +695,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; } + + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return 0; err_reps: @@ -718,6 +724,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { -- cgit v1.2.3 From 883371c453b937f9eb581fb4915210865982736f Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 28 Dec 2016 14:58:32 +0200 Subject: net/mlx5: Check FW limitations on log_max_qp before setting it When setting HCA capabilities, set log_max_qp to be the minimum between the selected profile's value and the HCA limitation. Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 54e5a786f191..23c12f1aaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,6 +503,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); -- cgit v1.2.3 From 689a248df83b6032edc57e86267b4e5cc8d7174e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 28 Dec 2016 14:58:33 +0200 Subject: net/mlx5: Cancel recovery work in remove flow If there is pending delayed work for health recovery it must be canceled if the device is being unloaded. Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23c12f1aaa39..0b49739eadd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1196,6 +1196,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; + mlx5_drain_health_wq(dev); + mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1358,10 +1360,9 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain health wq */ + /* In case of kernel call save the pci state */ if (state) { pci_save_state(pdev); - mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } -- cgit v1.2.3 From d151d73dcc99de87c63bdefebcc4cb69de1cdc40 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 28 Dec 2016 14:58:34 +0200 Subject: net/mlx5: Avoid shadowing numa_node Avoid using a local variable named numa_node to avoid shadowing a public one. Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0b49739eadd3..6547f22e6b9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -582,7 +582,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -590,7 +589,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return -ENOMEM; } - cpumask_set_cpu(cpumask_local_spread(i, numa_node), + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); -- cgit v1.2.3 From 077b1e8069b9b74477b01d28f6b83774dc19a142 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2016 14:58:35 +0200 Subject: net/mlx5: Mask destination mac value in ethtool steering rules We need to mask the destination mac value with the destination mac mask when adding steering rule via ethtool. Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3691451c728c..d088effd7160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, } if (fs->flow_type & FLOW_MAC_EXT && !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), fs->m_ext.h_dest); -- cgit v1.2.3 From 9b8c514291a83e53c073b473bdca6267f17a02c2 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2016 14:58:36 +0200 Subject: net/mlx5: Release FTE lock in error flow Release the FTE lock when adding rule to the FTE has failed. Fixes: 0fd758d6112f ('net/mlx5: Don't unlock fte while still using it') Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a263d8904a4c..0ac7a2fc916c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1263,6 +1263,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { + unlock_ref_node(&fte->node); kfree(fte); goto unlock_fg; } -- cgit v1.2.3 From ccce1700263d8b5b219359d04180492a726cea16 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Wed, 28 Dec 2016 14:58:37 +0200 Subject: net/mlx5: Prevent setting multicast macs for VFs Need to check that VF mac address entered by the admin user is either zero or unicast mac. Multicast mac addresses are prohibited. Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d6807c3cc461..f14d9c9ba773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); -- cgit v1.2.3 From 465db5dab86d6688fa5132edd1237102f4a20e84 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Dec 2016 14:58:38 +0200 Subject: Revert "net/mlx5e: Expose PCIe statistics to ethtool" This reverts commit 9c7262399ba12825f3ca4b00a76d8d5e77c720f5. PCIe counters were introduced in a new firmware version, as a result users with old firmware encountered a syndrome every 200ms due to update stats work. This feature will be re-introduced later with appropriate capabilities infrastructure. Fixes: 9c7262399ba1 ("net/mlx5e: Expose PCIe statistics to ethtool") Signed-off-by: Gal Pressman Reported-by: Jesper Dangaard Brouer Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 17 ------------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 ---------------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 32 +--------------------- 3 files changed, 1 insertion(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 352462af8d51..33a399a8b5d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -171,7 +171,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - NUM_PCIE_COUNTERS + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -219,14 +218,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_tas_stats_desc[i].format); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -339,14 +330,6 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters, - pcie_tas_stats_desc, i); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cbfa38fc72c0..be5ef036401d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -291,36 +291,12 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) &qcnt->rx_out_of_buffer); } -static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; - struct mlx5_core_dev *mdev = priv->mdev; - int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); - void *out; - u32 *in; - - in = mlx5_vzalloc(sz); - if (!in) - return; - - out = pcie_stats->pcie_perf_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - out = pcie_stats->pcie_tas_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - kvfree(in); -} - void mlx5e_update_stats(struct mlx5e_priv *priv) { mlx5e_update_q_counter(priv); mlx5e_update_vport_counters(priv); mlx5e_update_pport_counters(priv); mlx5e_update_sw_counters(priv); - mlx5e_update_pcie_counters(priv); } void mlx5e_update_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f202f872f57f..ba5db1dd23a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -39,7 +39,7 @@ #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ (*(u32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ - be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) @@ -276,32 +276,6 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; -#define PCIE_PERF_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_PERF_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \ - counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_TAS_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c) -#define PCIE_TAS_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \ - counter_set.pcie_tas_cntrs_grp_data_layout.c) - -struct mlx5e_pcie_stats { - __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; - __be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; -}; - -static const struct counter_desc pcie_perf_stats_desc[] = { - { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, - { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, -}; - -static const struct counter_desc pcie_tas_stats_desc[] = { - { "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) }, - { "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) }, -}; - struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -386,8 +360,6 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) -#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) -#define NUM_PCIE_TAS_COUNTERS ARRAY_SIZE(pcie_tas_stats_desc) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ @@ -397,7 +369,6 @@ static const struct counter_desc sq_stats_desc[] = { NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS (NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) @@ -406,7 +377,6 @@ struct mlx5e_stats { struct mlx5e_qcounter_stats qcnt; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct mlx5e_pcie_stats pcie; struct rtnl_link_stats64 vf_vport; }; -- cgit v1.2.3 From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Dec 2016 14:58:39 +0200 Subject: Revert "net/mlx5: Add MPCNT register infrastructure" This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51. Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure") Signed-off-by: Gal Pressman Reported-by: Jesper Dangaard Brouer Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 5 --- include/linux/mlx5/driver.h | 1 - include/linux/mlx5/mlx5_ifc.h | 93 ------------------------------------------- 3 files changed, 99 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9f489365b3d3..52b437431c6a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1071,11 +1071,6 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -enum { - MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, - MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, -}; - static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ae55361e674..735b36335f29 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -123,7 +123,6 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, - MLX5_REG_MPCNT = 0x9051, }; enum mlx5_dcbx_oper_mode { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57bec544e20a..a852e9db6f0d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; -struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 rx_errors[0x20]; - - u8 tx_errors[0x20]; - - u8 l0_to_recovery_eieos[0x20]; - - u8 l0_to_recovery_ts[0x20]; - - u8 l0_to_recovery_framing[0x20]; - - u8 l0_to_recovery_retrain[0x20]; - - u8 crc_error_dllp[0x20]; - - u8 crc_error_tlp[0x20]; - - u8 reserved_at_140[0x680]; -}; - -struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 time_to_boot_image_start[0x20]; - - u8 time_to_link_image[0x20]; - - u8 calibration_time[0x20]; - - u8 time_to_first_perst[0x20]; - - u8 time_to_detect_state[0x20]; - - u8 time_to_l0[0x20]; - - u8 time_to_crs_en[0x20]; - - u8 time_to_plastic_image_start[0x20]; - - u8 time_to_iron_image_start[0x20]; - - u8 perst_handler[0x20]; - - u8 times_in_l1[0x20]; - - u8 times_in_l23[0x20]; - - u8 dl_down[0x20]; - - u8 config_cycle1usec[0x20]; - - u8 config_cycle2to7usec[0x20]; - - u8 config_cycle_8to15usec[0x20]; - - u8 config_cycle_16_to_63usec[0x20]; - - u8 config_cycle_64usec[0x20]; - - u8 correctable_err_msg_sent[0x20]; - - u8 non_fatal_err_msg_sent[0x20]; - - u8 fatal_err_msg_sent[0x20]; - - u8 reserved_at_2e0[0x4e0]; -}; - struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; -union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { - struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; - struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; - u8 reserved_at_0[0x7c0]; -}; - union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; -struct mlx5_ifc_mpcnt_reg_bits { - u8 reserved_at_0[0x8]; - u8 pcie_index[0x8]; - u8 reserved_at_10[0xa]; - u8 grp[0x6]; - - u8 clr[0x1]; - u8 reserved_at_21[0x1f]; - - union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; -}; - struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; - struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; -- cgit v1.2.3 From 4525a45bfad55a00ef218c5fbe5d98a3d8170bf5 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 28 Dec 2016 14:58:40 +0200 Subject: net/mlx5e: Check ets capability before initializing ets settings During the initial setup, the ets command is sent to firmware without checking if the HCA supports ets. This causes the invalid command error. Add the ets capiblity check before sending firmware command to initialize ets settings. Fixes: e207b7e99176 ("net/mlx5e: ConnectX-4 firmware support for DCBX") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 7f6c225666c1..f0b460f47f29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -723,6 +723,9 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) int i; struct ieee_ets ets; + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + memset(&ets, 0, sizeof(ets)); ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; for (i = 0; i < ets.ets_cap; i++) { -- cgit v1.2.3 From 610e89e05c3f28a7394935aa6b91f99548c4fd3c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:41 +0200 Subject: net/mlx5e: Don't sync netdev state when not registered Skip setting netdev vxlan ports and netdev rx_mode on driver load when netdev is not yet registered. Synchronizing with netdev state is needed only on reset flow where the netdev remains registered for the whole reset period. This also fixes an access before initialization of net_device.addr_list_lock - which for some reason initialized on register_netdev - where we queued set_rx_mode work on driver load before netdev registration. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reported-by: Sebastian Ott Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index be5ef036401d..cf270f6c90e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3781,14 +3781,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); @@ -3798,6 +3791,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rep.netdev = netdev; mlx5_eswitch_register_vport_rep(esw, 0, &rep); } + + if (netdev->reg_state != NETREG_REGISTERED) + return; + + /* Device already registered: sync netdev system state */ + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + queue_work(priv->wq, &priv->set_rx_mode_work); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) -- cgit v1.2.3 From 37f304d10030bb425c19099e7b955d9c3ec4cba3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:42 +0200 Subject: net/mlx5e: Disable netdev after close Disable netdev should come after it was closed, although no harm of doing it before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cf270f6c90e8..1236b27b1493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3947,10 +3947,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); rtnl_lock(); if (netif_running(netdev)) @@ -3958,6 +3954,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) netif_device_detach(netdev); rtnl_unlock(); + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); -- cgit v1.2.3 From 9dd0f896d2cc5815d859e945db90915071cd44b3 Mon Sep 17 00:00:00 2001 From: Augusto Mecking Caringi Date: Wed, 28 Dec 2016 16:02:05 +0000 Subject: net: atm: Fix warnings in net/atm/lec.c when !CONFIG_PROC_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following warnings when CONFIG_PROC_FS is not set: linux/net/atm/lec.c: In function ‘lane_module_cleanup’: linux/net/atm/lec.c:1062:27: error: ‘atm_proc_root’ undeclared (first use in this function) remove_proc_entry("lec", atm_proc_root); ^ linux/net/atm/lec.c:1062:27: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Augusto Mecking Caringi Signed-off-by: David S. Miller --- net/atm/lec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/atm/lec.c b/net/atm/lec.c index 019557d0a11d..09cfe87f0a44 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1059,7 +1059,9 @@ static void __exit lane_module_cleanup(void) { int i; +#ifdef CONFIG_PROC_FS remove_proc_entry("lec", atm_proc_root); +#endif deregister_atm_ioctl(&lane_ioctl_ops); -- cgit v1.2.3 From 60133867f1f111aaf3a8c00375b8026142a9a591 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Dec 2016 16:44:23 +0000 Subject: net: wan: slic_ds26522: fix spelling mistake: "configurated" -> "configured" trivial fix to spelling mistake in pr_info message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/slic_ds26522.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index b776a0ab106c..9d9b4e0def2a 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -218,7 +218,7 @@ static int slic_ds26522_probe(struct spi_device *spi) ret = slic_ds26522_init_configure(spi); if (ret == 0) - pr_info("DS26522 cs%d configurated\n", spi->chip_select); + pr_info("DS26522 cs%d configured\n", spi->chip_select); return ret; } -- cgit v1.2.3 From e4c5e13aa45c23692e4acf56f0b3533f328199b2 Mon Sep 17 00:00:00 2001 From: Zheng Li Date: Wed, 28 Dec 2016 23:23:46 +0800 Subject: ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output There is an inconsistent conditional judgement between __ip6_append_data and ip6_finish_output functions, the variable length in __ip6_append_data just include the length of application's payload and udp6 header, don't include the length of ipv6 header, but in ip6_finish_output use (skb->len > ip6_skb_dst_mtu(skb)) as judgement, and skb->len include the length of ipv6 header. That causes some particular application's udp6 payloads whose length are between (MTU - IPv6 Header) and MTU were fragmented by ip6_fragment even though the rst->dev support UFO feature. Add the length of ipv6 header to length in __ip6_append_data to keep consistent conditional judgement as ip6_finish_output for ip6 fragment. Signed-off-by: Zheng Li Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 70d0de404197..38122d04fadc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1373,7 +1373,7 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && -- cgit v1.2.3 From b2eb09af7370fedc6b9d9f05762f01625438467a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 28 Dec 2016 15:44:41 -0800 Subject: net: stmmac: Fix error path after register_netdev move Commit 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open") re-ordered how the MDIO bus registration and the network device are registered, but missed to unwind the MDIO bus registration in case we fail to register the network device. Fixes: 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open") Signed-off-by: Florian Fainelli Acked-by: Kweh, Hock Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5910ea51f8f6..39eb7a65bb9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3366,12 +3366,19 @@ int stmmac_dvr_probe(struct device *device, } ret = register_netdev(ndev); - if (ret) + if (ret) { netdev_err(priv->dev, "%s: ERROR %i registering the device\n", __func__, ret); + goto error_netdev_register; + } return ret; +error_netdev_register: + if (priv->hw->pcs != STMMAC_PCS_RGMII && + priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); error_mdio_register: netif_napi_del(&priv->napi); error_hw_init: -- cgit v1.2.3 From b91e1302ad9b80c174a4855533f7e3aa2873355e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 Dec 2016 11:40:38 -0800 Subject: mm: optimize PageWaiters bit use for unlock_page() In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin Cc: Dave Hansen Cc: Bob Peterson Cc: Steven Whitehouse Cc: Andrew Lutomirski Cc: Andreas Gruenbacher Cc: Peter Zijlstra Cc: Mel Gorman Signed-off-by: Linus Torvalds --- arch/x86/include/asm/bitops.h | 13 +++++++++++++ include/linux/page-flags.h | 2 +- mm/filemap.c | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 68557f52b961..854022772c5b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } +static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1\n\t" + CC_SET(s) + : CC_OUT(s) (negative), ADDR + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; +} + +// Let everybody know we have it +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte + /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c56b39890a41..6b5818d6de32 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -73,13 +73,13 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ - PG_waiters, /* Page has waiters, check its waitqueue */ PG_error, PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_active, + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, diff --git a/mm/filemap.c b/mm/filemap.c index 82f26cde830c..6b1d96f86a9c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); -- cgit v1.2.3 From 4775cc1f2d5abca894ac32774eefc22c45347d1c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 28 Dec 2016 17:52:15 +0100 Subject: rtnl: stats - add missing netlink message size checks We miss to check if the netlink message is actually big enough to contain a struct if_stats_msg. Add a check to prevent userland from sending us short messages that would make us access memory beyond the end of the message. Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...") Signed-off-by: Mathias Krause Cc: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 18b5aae99bec..75e3ea7bda08 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3898,6 +3898,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) u32 filter_mask; int err; + if (nlmsg_len(nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); @@ -3947,6 +3950,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; + if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(cb->nlh); filter_mask = ifsm->filter_mask; if (!filter_mask) -- cgit v1.2.3 From f0c16ba8933ed217c2688b277410b2a37ba81591 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 29 Dec 2016 16:45:04 +0800 Subject: net: fix incorrect original ingress device index in PKTINFO When we send a packet for our own local address on a non-loopback interface (e.g. eth0), due to the change had been introduced from commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the original ingress device index would be set as the loopback interface. However, the packet should be considered as if it is being arrived via the sending interface (eth0), otherwise it would break the expectation of the userspace application (e.g. the DHCPRELEASE message from dhcp_release binary would be ignored by the dnsmasq daemon, since it come from lo which is not the interface dnsmasq bind to) Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO") Acked-by: David Ahern Signed-off-by: Wei Zhang Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 57e1405e8282..53ae0c6315ad 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1225,8 +1225,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; -- cgit v1.2.3 From 3b01fe7f91c8e4f9afc4fae3c5af72c14958d2d8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:09 +0200 Subject: net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering rule (which results in freeing the rule structure), and then references a field in this struct (the qp number) when releasing the busy-status on the rule's qp. Since this memory was freed, it could reallocated and changed. Therefore, the qp number in the struct may be incorrect, so that we are releasing the incorrect qp. This leaves the rule's qp in the busy state (and could possibly release an incorrect qp as well). Fix this by saving the qp number in a local variable, for use after removing the steering rule. Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c548beaaf910..4b3e139e9c82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4473,6 +4473,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4489,10 +4490,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4517,7 +4519,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } -- cgit v1.2.3 From 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 29 Dec 2016 18:37:10 +0200 Subject: net/mlx4_en: Fix bad WQE issue Single send WQE in RX buffer should be stamped with software ownership in order to prevent the flow of QP in error in FW once UPDATE_QP is called. Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 3c37e216bbf3..eac527e25ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; -- cgit v1.2.3 From c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Dec 2016 18:37:11 +0200 Subject: net/mlx4: Remove BUG_ON from ICM allocation routine This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent() by checking DMA address alignment in advance and performing proper folding in case of error. Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory") Reported-by: Ozgur Karatas Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } -- cgit v1.2.3 From 61b6034c6cfdcb265bb453505c3d688e7567727a Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Thu, 29 Dec 2016 18:37:12 +0200 Subject: net/mlx4_en: Fix type mismatch for 32-bit systems is_power_of_2 expects unsigned long and we pass u64 max_val_cycles, this will be truncated on 32 bit systems, and the result is not what we were expecting. div_u64 expects u32 as a second argument and we pass max_val_cycles_rounded which is u64 hence it will always be truncated. Fix was tested on both 64 and 32 bit systems and got same results for max_val_cycles and max_val_cycles_rounded. Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system") Signed-off-by: Slava Shwartsman Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 015198c14fa8..504461a464c5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); -- cgit v1.2.3 From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:13 +0200 Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV Demoting simple flow steering rule priority (for DPDK) was achieved by wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper function for the PF and all VFs. In function mlx4_ib_create_flow(), this change caused the main rule creation for the PF to be wrapped, while it left the associated tunnel steering rule creation unwrapped for the PF. This mismatch caused rule deletion failures in mlx4_ib_destroy_flow() for the PF when the detach wrapper function did not find the associated tunnel-steering rule (since creation of that rule for the PF did not go through the wrapper function). Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native" (so that the PF invocation does not go through the wrapper), and perform the required priority demotion for the PF in the mlx4_ib_create_flow() code path. Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 18 ++++++++++++++++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 22 +--------------------- include/linux/mlx4/device.h | 2 ++ 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c8413fc120e6..7031a8dd4d14 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..bffa6f345f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4b3e139e9c82..56185a0b827d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 93bdb3485192..6533c16e27ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3 From 41744213602a206f24adcb4a2b7551db3c700e72 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 26 Dec 2016 12:46:01 +0100 Subject: parisc: Mark cr16 clocksource unstable on SMP systems The cr16 interval timer of each CPU is not syncronized to other cr16 timers in other CPUs in a SMP system. So, delay the registration of the cr16 clocksource until all CPUs have been detected and then - if we are on a SMP machine - mark the cr16 clocksource as unstable and lower it's rating before registering it at the clocksource framework. This patch fixes the stalled CPU warnings which we have seen since introduction of the cr16 clocksource. Signed-off-by: Helge Deller Cc: # v4.8+ --- arch/parisc/kernel/time.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index da0d9cb63403..1e22f981cd81 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -235,9 +235,26 @@ void __init time_init(void) cr16_hz = 100 * PAGE0->mem_10msec; /* Hz */ - /* register at clocksource framework */ - clocksource_register_hz(&clocksource_cr16, cr16_hz); - /* register as sched_clock source */ sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz); } + +static int __init init_cr16_clocksource(void) +{ + /* + * The cr16 interval timers are not syncronized across CPUs, so mark + * them unstable and lower rating on SMP systems. + */ + if (num_online_cpus() > 1) { + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + } + + /* register at clocksource framework */ + clocksource_register_hz(&clocksource_cr16, + 100 * PAGE0->mem_10msec); + + return 0; +} + +device_initcall(init_cr16_clocksource); -- cgit v1.2.3 From 1fe0a7e0bc52024a445945c9e7691551aba97390 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 27 Dec 2016 18:03:50 +0100 Subject: parisc: Drop TIF_RESTORE_SIGMASK and switch to generic code Commit 7e7814180b33 ("signal: consolidate {TS,TLF}_RESTORE_SIGMASK code") introduced code with which the "restore sigmask" flag lives in task_struct instead of ti->flags. Let's use this optimization on parisc too. Signed-off-by: Helge Deller --- arch/parisc/include/asm/thread_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 7581330ea35b..88fe0aad4390 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -49,7 +49,6 @@ struct thread_info { #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ -- cgit v1.2.3 From 98473f9f3f9bd404873cd1178c8be7d6d619f0d1 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 29 Dec 2016 14:16:07 -0800 Subject: mm/filemap: fix parameters to test_bit() mm/filemap.c: In function 'clear_bit_unlock_is_negative_byte': mm/filemap.c:933:9: error: too few arguments to function 'test_bit' return test_bit(PG_waiters); ^~~~~~~~ Fixes: b91e1302ad9b ('mm: optimize PageWaiters bit use for unlock_page()') Signed-off-by: Olof Johansson Brown-paper-bag-by: Linus Torvalds Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6b1d96f86a9c..d0e4d1002059 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -930,7 +930,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem { clear_bit_unlock(nr, mem); /* smp_mb__after_atomic(); */ - return test_bit(PG_waiters); + return test_bit(PG_waiters, mem); } #endif -- cgit v1.2.3 From 2344ef3c86a7fe41f97bf66c7936001b6132860b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 30 Dec 2016 00:07:38 +0300 Subject: sh_eth: fix branch prediction in sh_eth_interrupt() IIUC, likely()/unlikely() should apply to the whole *if* statement's expression, not a part of it -- fix such expression in sh_eth_interrupt() accordingly... Fixes: 283e38db65e7 ("sh_eth: Fix serialisation of interrupt disable with interrupt & NAPI handlers") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f341c1bc7001..0af7fc279c85 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1656,7 +1656,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) else goto out; - if (!likely(mdp->irq_enabled)) { + if (unlikely(!mdp->irq_enabled)) { sh_eth_write(ndev, 0, EESIPR); goto out; } -- cgit v1.2.3 From f5a0aab84b74de68523599817569c057c7ac1622 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 29 Dec 2016 15:29:03 -0800 Subject: net: ipv4: dst for local input routes should use l3mdev if relevant IPv4 output routes already use l3mdev device instead of loopback for dst's if it is applicable. Change local input routes to do the same. This fixes icmp responses for unreachable UDP ports which are directed to the wrong table after commit 9d1a6c4ea43e4 because local_input routes use the loopback device. Moving from ingress device to loopback loses the L3 domain causing responses based on the dst to get to lost. Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to determine L3 domain") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a82a11747b3f..0fcac8e7a2b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1914,7 +1914,8 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; -- cgit v1.2.3 From 42d97eb0ade31e1bc537d086842f5d6e766d9d51 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Dec 2016 14:20:13 -0800 Subject: fscrypt: fix renaming and linking special files Attempting to link a device node, named pipe, or socket file into an encrypted directory through rename(2) or link(2) always failed with EPERM. This happened because fscrypt_has_permitted_context() saw that the file was unencrypted and forbid creating the link. This behavior was unexpected because such files are never encrypted; only regular files, directories, and symlinks can be encrypted. To fix this, make fscrypt_has_permitted_context() always return true on special files. This will be covered by a test in my encryption xfstests patchset. Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support") Signed-off-by: Eric Biggers Reviewed-by: Richard Weinberger Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o --- fs/crypto/policy.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 6ed7c2eebeec..d6cd7ea4851d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -179,6 +179,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!parent->i_sb->s_cop->is_encrypted(parent)) return 1; -- cgit v1.2.3 From e1a3a60a2ebe991605acb14cd58e39c0545e174e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Dec 2016 17:42:32 -0600 Subject: net: socket: don't set sk_uid to garbage value in ->setattr() ->setattr() was recently implemented for socket files to sync the socket inode's uid to the new 'sk_uid' member of struct sock. It does this by copying over the ia_uid member of struct iattr. However, ia_uid is actually only valid when ATTR_UID is set in ia_valid, indicating that the uid is being changed, e.g. by chown. Other metadata operations such as chmod or utimes leave ia_uid uninitialized. Therefore, sk_uid could be set to a "garbage" value from the stack. Fix this by only copying the uid over when ATTR_UID is set. Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 8487bf136e5c..a8c2307590b8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -537,7 +537,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); - if (!err) { + if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); sock->sk->sk_uid = iattr->ia_uid; -- cgit v1.2.3 From 0c744ea4f77d72b3dcebb7a8f2684633ec79be88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Jan 2017 14:31:53 -0800 Subject: Linux 4.10-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ec411ba9e40f..5470d599384a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Roaring Lionus # *DOCUMENTATION* -- cgit v1.2.3 From 086cc1c31a0ec075dac02425367c871bb65bc2c9 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 14 Dec 2016 21:27:57 +0900 Subject: openrisc: Add _text symbol to fix ksym build error The build robot reports: .tmp_kallsyms1.o: In function `kallsyms_relative_base': >> (.rodata+0x8a18): undefined reference to `_text' This is when using 'make alldefconfig'. Adding this _text symbol to mark the start of the kernel as in other architecture fixes this. Signed-off-by: Stafford Horne Acked-by: Jonas Bonn --- arch/openrisc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index ef31fc24344e..552544616b9d 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -44,6 +44,8 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = LOAD_BASE ; + _text = .; + /* _s_kernel_ro must be page aligned */ . = ALIGN(PAGE_SIZE); _s_kernel_ro = .; -- cgit v1.2.3 From 4200462d88f47f3759bdf4705f87e207b0f5b2e4 Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Sat, 31 Dec 2016 21:11:57 +0100 Subject: drop_monitor: add missing call to genlmsg_end Update nlmsg_len field with genlmsg_end to enable userspace processing using nlmsg_next helper. Also adds error handling. Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 8e0c0635ee97..f465bad2ef2c 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -75,6 +75,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -82,17 +83,31 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); + if (!skb) + goto err; + + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; } + msg = nla_data(nla); + memset(msg, 0, al); + genlmsg_end(skb, msg_header); + goto out; +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); -- cgit v1.2.3 From 97b84fd6d91766ea57dcc350d78f42639e011c30 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:19 +0100 Subject: l2tp: consider '::' as wildcard address in l2tp_ip6 socket lookup An L2TP socket bound to the unspecified address should match with any address. If not, it can't receive any packet and __l2tp_ip6_bind_lookup() can't prevent another socket from binding on the same device/tunnel ID. While there, rename the 'addr' variable to 'sk_laddr' (local addr), to make following patch clearer. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f092ac441fdd..3135b9d55df5 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -64,7 +64,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - const struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -72,7 +72,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && - (!addr || ipv6_addr_equal(addr, laddr)) && + (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; -- cgit v1.2.3 From a9b2dff80be979432484afaf7f8d8e73f9e8838a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:20 +0100 Subject: l2tp: take remote address into account in l2tp_ip and l2tp_ip6 socket lookups For connected sockets, __l2tp_ip{,6}_bind_lookup() needs to check the remote IP when looking for a matching socket. Otherwise a connected socket can receive traffic not originating from its peer. Drop l2tp_ip_bind_lookup() and l2tp_ip6_bind_lookup() instead of updating their prototype, as these functions aren't used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 19 ++++++------------- net/l2tp/l2tp_ip6.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 8938b6ba57a0..3d73278b86ca 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -47,7 +47,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) return (struct l2tp_ip_sock *)sk; } -static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) +static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, + __be32 raddr, int dif, u32 tunnel_id) { struct sock *sk; @@ -61,6 +62,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -71,15 +73,6 @@ found: return sk; } -static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -183,8 +176,8 @@ pass_up: struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), - tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, + inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip_lock); goto discard; @@ -280,7 +273,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ write_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip_lock); ret = -EADDRINUSE; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 3135b9d55df5..331ccf5a7bad 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk) static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, + const struct in6_addr *raddr, int dif, u32 tunnel_id) { struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -73,6 +75,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && + (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -83,17 +86,6 @@ found: return sk; } -static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, - struct in6_addr *laddr, - int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -197,8 +189,8 @@ pass_up: struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), - tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip6_lock); goto discard; @@ -330,7 +322,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); write_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip6_lock); err = -EADDRINUSE; -- cgit v1.2.3 From 35f432a03e41d3bf08c51ede917f94e2288fbe8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jan 2017 11:19:29 +0100 Subject: mac80211: initialize fast-xmit 'info' later In ieee80211_xmit_fast(), 'info' is initialized to point to the skb that's passed in, but that skb may later be replaced by a clone (if it was shared), leading to an invalid pointer. This can lead to use-after-free and also later crashes since the real SKB's info->hw_queue doesn't get initialized properly. Fix this by assigning info only later, when it's needed, after the skb replacement (may have) happened. Cc: stable@vger.kernel.org Reported-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2c21b7039136..0d8b716e509e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3287,7 +3287,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -3351,6 +3351,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; -- cgit v1.2.3 From f83f90cf7ba68deb09406ea9da80852a64c4db29 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Fri, 23 Dec 2016 12:50:13 +0000 Subject: HID: usbhid: Add quirk for the Futaba TOSD-5711BB VFD The Futaba TOSD-5711BB VFD crashes when the initial HID report is requested, register the display in hid-ids and tell hid-quirks to not do the init. Signed-off-by: Alex Wood Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ec277b96eaa1..62b03d53ecd1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -365,6 +365,9 @@ #define USB_VENDOR_ID_FLATFROG 0x25b5 #define USB_DEVICE_ID_MULTITOUCH_3200 0x0002 +#define USB_VENDOR_ID_FUTABA 0x0547 +#define USB_DEVICE_ID_LED_DISPLAY 0x7000 + #define USB_VENDOR_ID_ESSENTIAL_REALITY 0x0d7f #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index b3e01c82af05..6fd49788fbe6 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -88,6 +88,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, -- cgit v1.2.3 From 8aa2cc7e747881d1fd52db28261b201d4e3e5565 Mon Sep 17 00:00:00 2001 From: Marcel Hasler Date: Tue, 20 Dec 2016 22:08:13 +0100 Subject: HID: usbhid: Add quirk for Mayflash/Dragonrise DolphinBar. The DolphinBar by Mayflash (identified as Dragonrise) needs HID_QUIRK_MULTI_INPUT to split it up into four input devices. Without this quirk the adapter is falsely recognized as a tablet. See also bug 115841 (https://bugzilla.kernel.org/show_bug.cgi?id=115841). Signed-off-by: Marcel Hasler Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 62b03d53ecd1..54bd22dc1411 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -319,6 +319,7 @@ #define USB_VENDOR_ID_DRAGONRISE 0x0079 #define USB_DEVICE_ID_DRAGONRISE_WIIU 0x1800 #define USB_DEVICE_ID_DRAGONRISE_PS3 0x1801 +#define USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR 0x1803 #define USB_DEVICE_ID_DRAGONRISE_GAMECUBE 0x1843 #define USB_VENDOR_ID_DWAV 0x0eef diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 6fd49788fbe6..e9d6cc7cdfc5 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -83,6 +83,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, -- cgit v1.2.3 From 143fca77cce906d35f7a60ccef648e888df589f2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Dec 2016 08:57:59 -0800 Subject: HID: sensor-hub: Move the memset to sensor_hub_get_feature() While applying patch d443a0aa3a29: "HID: hid-sensor-hub: clear memory to avoid random data", there was some issues in applying correct version of the patch. This resulted in the breakage of sensor functions as all request like power-up will be reset by the memset() in the function sensor_hub_set_feature(). The reset of caller buffer should be in the function sensor_hub_get_feature(), not in the sensor_hub_set_feature(). Fixes: d443a0aa3a29 ("HID: hid-sensor-hub: clear memory to avoid random data") Cc: Stable # 4.9+ Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 5c925228847c..4ef73374a8f9 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -212,7 +212,6 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, __s32 value; int ret = 0; - memset(buffer, 0, buffer_size); mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield)) { @@ -256,6 +255,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, int buffer_index = 0; int i; + memset(buffer, 0, buffer_size); + mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield) || -- cgit v1.2.3 From 6c006a9d94bfb5cbcc5150e8fd7f45d3f92f3ee8 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Sun, 25 Dec 2016 19:01:03 +0530 Subject: clean_bdev_aliases: Prevent cleaning blocks that are not in block range The first block to be cleaned may start at a non-zero page offset. In such a scenario clean_bdev_aliases() will end up cleaning blocks that do not fall in the range of blocks to be cleaned. This commit fixes the issue by skipping blocks that do not fall in valid block range. Signed-off-by: Chandan Rajendra Reviewed-by: Jan Kara Reviewed-by: Eryu Guan Signed-off-by: Jens Axboe --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index d21771fcf7d3..0e87401cf335 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1660,7 +1660,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) head = page_buffers(page); bh = head; do { - if (!buffer_mapped(bh)) + if (!buffer_mapped(bh) || (bh->b_blocknr < block)) goto next; if (bh->b_blocknr >= block + len) break; -- cgit v1.2.3 From f2e0a0b292682dd94274d6793d76656b41526147 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jan 2017 09:46:15 -0700 Subject: block: Make wbt_wait() definition consistent with declaration Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism") Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6e82769f4042..fd28c2806406 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -595,7 +595,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. */ -unsigned int wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) +enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) { unsigned int ret = 0; -- cgit v1.2.3 From 9eca53508a157c6b6fdb6e06796902cf8a920d29 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 2 Jan 2017 09:48:47 -0700 Subject: block: Avoid that sparse complains about context imbalance in __wbt_wait() This patch does not change any functionality. Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism") Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-wbt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index fd28c2806406..f0a9c07b4c7a 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -544,6 +544,8 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) + __releases(lock) + __acquires(lock) { struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd()); DEFINE_WAIT(wait); @@ -558,13 +560,12 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) if (may_queue(rwb, rqw, &wait, rw)) break; - if (lock) + if (lock) { spin_unlock_irq(lock); - - io_schedule(); - - if (lock) + io_schedule(); spin_lock_irq(lock); + } else + io_schedule(); } while (1); finish_wait(&rqw->wait, &wait); -- cgit v1.2.3 From b4a9eb4cd5966c8aad3d007d206a2cbda97d6928 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 2 Jan 2017 17:43:15 +0100 Subject: parisc: Add line-break when printing segfault info Add a leading line break else printed line gets too long. Signed-off-by: Helge Deller Cc: # v4.9 --- arch/parisc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 8ff9253930af..1a0b4f63f0e9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -234,7 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, tsk->comm, code, address); print_vma_addr(KERN_CONT " in ", regs->iaoq[0]); - pr_cont(" trap #%lu: %s%c", code, trap_name(code), + pr_cont("\ntrap #%lu: %s%c", code, trap_name(code), vma ? ',':'\n'); if (vma) -- cgit v1.2.3 From d0af683407a26a4437d8fa6e283ea201f2ae8146 Mon Sep 17 00:00:00 2001 From: Ian Kumlien Date: Mon, 2 Jan 2017 09:18:35 +0100 Subject: flow_dissector: Update pptp handling to avoid null pointer deref. __skb_flow_dissect can be called with a skb or a data packet, either can be NULL. All calls seems to have been moved to __skb_header_pointer except the pptp handling which is still calling skb_header_pointer. skb_header_pointer will use skb->data and thus: [ 109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [ 109.557102] IP: [] __skb_flow_dissect+0xa88/0xce0 [ 109.557263] PGD 0 [ 109.557338] [ 109.557484] Oops: 0000 [#1] SMP [ 109.557562] Modules linked in: chaoskey [ 109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79 [ 109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015 [ 109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000 [ 109.558041] RIP: 0010:[] [] __skb_flow_dissect+0xa88/0xce0 [ 109.558203] RSP: 0018:ffff94087fc83d40 EFLAGS: 00010206 [ 109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800 [ 109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000 [ 109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022 [ 109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000 [ 109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f [ 109.558979] FS: 0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000 [ 109.559326] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0 [ 109.559753] Stack: [ 109.559957] 000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0 [ 109.560578] 0000000000000001 0000000000002000 0000000000000000 0000000000000000 [ 109.561200] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 109.561820] Call Trace: [ 109.562027] [ 109.562108] [] ? eth_get_headlen+0x7a/0xf0 [ 109.562522] [] ? igb_poll+0x96a/0xe80 [ 109.562737] [] ? net_rx_action+0x20b/0x350 [ 109.562953] [] ? __do_softirq+0xe8/0x280 [ 109.563169] [] ? irq_exit+0xaa/0xb0 [ 109.563382] [] ? do_IRQ+0x4b/0xc0 [ 109.563597] [] ? common_interrupt+0x7f/0x7f [ 109.563810] [ 109.563890] [] ? cpuidle_enter_state+0x130/0x2c0 [ 109.564304] [] ? cpuidle_enter_state+0x120/0x2c0 [ 109.564520] [] ? cpu_startup_entry+0x19f/0x1f0 [ 109.564737] [] ? start_secondary+0x12a/0x140 [ 109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00 00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2 04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84 24 84 [ 109.569959] RIP [] __skb_flow_dissect+0xa88/0xce0 [ 109.570245] RSP [ 109.570453] CR2: 0000000000000080 Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash") Signed-off-by: Ian Kumlien Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6447dc10371..fe4e1531976c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -468,8 +468,9 @@ ip_proto_again: if (hdr->flags & GRE_ACK) offset += sizeof(((struct pptp_gre_header *)0)->ack); - ppp_hdr = skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), _ppp_hdr); + ppp_hdr = __skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), + data, hlen, _ppp_hdr); if (!ppp_hdr) goto out_bad; -- cgit v1.2.3 From 7ababb782690e03b78657e27bd051e20163af2d6 Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Mon, 2 Jan 2017 14:38:36 +0100 Subject: igmp: Make igmp group member RFC 3376 compliant 5.2. Action on Reception of a Query When a system receives a Query, it does not respond immediately. Instead, it delays its response by a random amount of time, bounded by the Max Resp Time value derived from the Max Resp Code in the received Query message. A system may receive a variety of Queries on different interfaces and of different kinds (e.g., General Queries, Group-Specific Queries, and Group-and-Source-Specific Queries), each of which may require its own delayed response. Before scheduling a response to a Query, the system must first consider previously scheduled pending responses and in many cases schedule a combined response. Therefore, the system must be able to maintain the following state: o A timer per interface for scheduling responses to General Queries. o A per-group and interface timer for scheduling responses to Group- Specific and Group-and-Source-Specific Queries. o A per-group and interface list of sources to be reported in the response to a Group-and-Source-Specific Query. When a new Query with the Router-Alert option arrives on an interface, provided the system has state to report, a delay for a response is randomly selected in the range (0, [Max Resp Time]) where Max Resp Time is derived from Max Resp Code in the received Query message. The following rules are then used to determine if a Report needs to be scheduled and the type of Report to schedule. The rules are considered in order and only the first matching rule is applied. 1. If there is a pending response to a previous General Query scheduled sooner than the selected delay, no additional response needs to be scheduled. 2. If the received Query is a General Query, the interface timer is used to schedule a response to the General Query after the selected delay. Any previously pending response to a General Query is canceled. --8<-- Currently the timer is rearmed with new random expiration time for every incoming query regardless of possibly already pending report. Which is not aligned with the above RFE. It also might happen that higher rate of incoming queries can postpone the report after the expiration time of the first query causing group membership loss. Now the per interface general query timer is rearmed only when there is no pending report already scheduled on that interface or the newly selected expiration time is before the already pending scheduled report. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 68d622133f53..5b15459955f8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } -- cgit v1.2.3 From 4e5da369df64628358e25ffedcf80ac43af3793d Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Mon, 2 Jan 2017 18:52:24 +0100 Subject: Documentation/networking: fix typo in mpls-sysctl s/utliziation/utilization Signed-off-by: Alexander Alemayhu Signed-off-by: David S. Miller --- Documentation/networking/mpls-sysctl.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 9ed15f86c17c..15d8d16934fd 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -5,8 +5,8 @@ platform_labels - INTEGER possible to configure forwarding for label values equal to or greater than the number of platform labels. - A dense utliziation of the entries in the platform label table - is possible and expected aas the platform labels are locally + A dense utilization of the entries in the platform label table + is possible and expected as the platform labels are locally allocated. If the number of platform label table entries is set to 0 no -- cgit v1.2.3 From 8f87e626b059f1b82b017f53c5ee91fbc4486e36 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:02 +0100 Subject: net: stmmac: dwmac-oxnas: fix of-node leak Use the syscon lookup-by-phandle helper so that the reference taken by of_parse_phandle() is released when done with the node. Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index c35597586121..fcc237e0aae1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -109,16 +109,9 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - struct device_node *sysctrl; struct oxnas_dwmac *dwmac; int ret; - sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0); - if (!sysctrl) { - dev_err(&pdev->dev, "failed to get sys-ctrl node\n"); - return -EINVAL; - } - ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) return ret; @@ -134,7 +127,8 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; - dwmac->regmap = syscon_node_to_regmap(sysctrl); + dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "oxsemi,sys-ctrl"); if (IS_ERR(dwmac->regmap)) { dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); return PTR_ERR(dwmac->regmap); -- cgit v1.2.3 From 6b4c212b95ce6a586473a772fb2d28ab22a38f0e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:03 +0100 Subject: net: stmmac: dwmac-oxnas: fix fixed-link-phydev leaks Make sure to deregister and free any fixed-link phy registered during probe on probe errors and on driver unbind by calling the new glue helper function. For driver unbind, use the generic stmmac-platform remove implementation and add an exit callback to disable the clock. Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 41 ++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index fcc237e0aae1..3efd110613df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -105,6 +105,13 @@ static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) return 0; } +static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct oxnas_dwmac *dwmac = priv; + + clk_disable_unprepare(dwmac->clk); +} + static int oxnas_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -121,40 +128,44 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) - return -ENOMEM; + if (!dwmac) { + ret = -ENOMEM; + goto err_remove_config_dt; + } dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; + plat_dat->exit = oxnas_dwmac_exit; dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl"); if (IS_ERR(dwmac->regmap)) { dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); - return PTR_ERR(dwmac->regmap); + ret = PTR_ERR(dwmac->regmap); + goto err_remove_config_dt; } dwmac->clk = devm_clk_get(&pdev->dev, "gmac"); - if (IS_ERR(dwmac->clk)) - return PTR_ERR(dwmac->clk); + if (IS_ERR(dwmac->clk)) { + ret = PTR_ERR(dwmac->clk); + goto err_remove_config_dt; + } ret = oxnas_dwmac_init(dwmac); if (ret) - return ret; + goto err_remove_config_dt; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - clk_disable_unprepare(dwmac->clk); + goto err_dwmac_exit; - return ret; -} -static int oxnas_dwmac_remove(struct platform_device *pdev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - int ret = stmmac_dvr_remove(&pdev->dev); + return 0; - clk_disable_unprepare(dwmac->clk); +err_dwmac_exit: + oxnas_dwmac_exit(pdev, plat_dat->bsp_priv); +err_remove_config_dt: + stmmac_remove_config_dt(pdev, plat_dat); return ret; } @@ -197,7 +208,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); static struct platform_driver oxnas_dwmac_driver = { .probe = oxnas_dwmac_probe, - .remove = oxnas_dwmac_remove, + .remove = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", .pm = &oxnas_dwmac_pm_ops, -- cgit v1.2.3 From a8de4d719dfc12bc22192d7daef7c7ae6cfb8b80 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:04 +0100 Subject: net: stmmac: dwmac-oxnas: use generic pm implementation Now that we have an exit callback in place, add init as well and get rid of the custom PM callbacks in favour of the generic ones. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 38 +++-------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 3efd110613df..3dc7d279f805 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -60,8 +60,9 @@ struct oxnas_dwmac { struct regmap *regmap; }; -static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) +static int oxnas_dwmac_init(struct platform_device *pdev, void *priv) { + struct oxnas_dwmac *dwmac = priv; unsigned int value; int ret; @@ -135,6 +136,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; + plat_dat->init = oxnas_dwmac_init; plat_dat->exit = oxnas_dwmac_exit; dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, @@ -151,7 +153,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } - ret = oxnas_dwmac_init(dwmac); + ret = oxnas_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) goto err_remove_config_dt; @@ -170,36 +172,6 @@ err_remove_config_dt: return ret; } -#ifdef CONFIG_PM_SLEEP -static int oxnas_dwmac_suspend(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = stmmac_suspend(dev); - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -static int oxnas_dwmac_resume(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = oxnas_dwmac_init(dwmac); - if (ret) - return ret; - - ret = stmmac_resume(dev); - - return ret; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops, - oxnas_dwmac_suspend, oxnas_dwmac_resume); - static const struct of_device_id oxnas_dwmac_match[] = { { .compatible = "oxsemi,ox820-dwmac" }, { } @@ -211,7 +183,7 @@ static struct platform_driver oxnas_dwmac_driver = { .remove = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", - .pm = &oxnas_dwmac_pm_ops, + .pm = &stmmac_pltfr_pm_ops, .of_match_table = oxnas_dwmac_match, }, }; -- cgit v1.2.3 From 515028fe29d84a15f77d071a13b2d34eb3d137af Mon Sep 17 00:00:00 2001 From: Bartosz Folta Date: Mon, 2 Jan 2017 12:41:50 +0000 Subject: net: macb: Updated resource allocation function calls to new version of API. Changed function calls of resource allocation to new API. Changed way of setting DMA mask. Removed unnecessary sanity check. This patch is sent in regard to recently applied patch Commit 83a77e9ec4150ee4acc635638f7dedd9da523a26 net: macb: Added PCI wrapper for Platform Driver. Signed-off-by: Bartosz Folta Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_pci.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 92be2cd8f817..9906fda76087 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -1,5 +1,5 @@ /** - * macb_pci.c - Cadence GEM PCI wrapper. + * Cadence GEM PCI wrapper. * * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com * @@ -45,32 +45,27 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct macb_platform_data plat_data; struct resource res[2]; - /* sanity check */ - if (!id) - return -EINVAL; - /* enable pci device */ - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err < 0) { - dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X", - err); - return -EACCES; + dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err); + return err; } pci_set_master(pdev); /* set up resources */ memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res)); - res[0].start = pdev->resource[0].start; - res[0].end = pdev->resource[0].end; + res[0].start = pci_resource_start(pdev, 0); + res[0].end = pci_resource_end(pdev, 0); res[0].name = PCI_DRIVER_NAME; res[0].flags = IORESOURCE_MEM; - res[1].start = pdev->irq; + res[1].start = pci_irq_vector(pdev, 0); res[1].name = PCI_DRIVER_NAME; res[1].flags = IORESOURCE_IRQ; - dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n", - (void *)(uintptr_t)pci_resource_start(pdev, 0)); + dev_info(&pdev->dev, "EMAC physical base addr: %pa\n", + &res[0].start); /* set up macb platform data */ memset(&plat_data, 0, sizeof(plat_data)); @@ -100,7 +95,7 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) plat_info.num_res = ARRAY_SIZE(res); plat_info.data = &plat_data; plat_info.size_data = sizeof(plat_data); - plat_info.dma_mask = DMA_BIT_MASK(32); + plat_info.dma_mask = pdev->dma_mask; /* register platform device */ plat_dev = platform_device_register_full(&plat_info); @@ -120,7 +115,6 @@ err_hclk_register: clk_unregister(plat_data.pclk); err_pclk_register: - pci_disable_device(pdev); return err; } @@ -130,7 +124,6 @@ static void macb_remove(struct pci_dev *pdev) struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); platform_device_unregister(plat_dev); - pci_disable_device(pdev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); } -- cgit v1.2.3 From 5350d54f6cd12eaff623e890744c79b700bd3f17 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 2 Jan 2017 13:32:54 -0800 Subject: ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules In the case of custom rules being present we need to handle the case of the LOCAL table being intialized after the new rule has been added. To address that I am adding a new check so that we can make certain we don't use an alias of MAIN for LOCAL when allocating a new table. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Reported-by: Oliver Brunel Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3ff8938893ec..eae0332b0e8c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); -- cgit v1.2.3 From 096de2f83ebc8e0404c5b7e847a4abd27b9739da Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 3 Jan 2017 16:26:04 +0100 Subject: benet: stricter vxlan offloading check in be_features_check When VXLAN offloading is enabled, be_features_check() tries to check if an encapsulated packet is indeed a VXLAN packet. The check is not strict enough, and considers any UDP-encapsulated ethernet frame with a 8-byte tunnel header as being VXLAN. Unfortunately, both GENEVE and VXLAN-GPE have a 8-byte header, so they get through this check. Force the UDP destination port to be the one that has been offloaded to hardware. Without this, GENEVE-encapsulated packets can end up having an incorrect checksum when both a GENEVE and a VXLAN (offloaded) tunnel are configured. This is similar to commit a547224dceed ("mlx4e: Do not attempt to offload VXLAN ports that are unrecognized"). Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e1633bf5a22..225e9a4877d7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5155,7 +5155,9 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) || + !adapter->vxlan_port || + udp_hdr(skb)->dest != adapter->vxlan_port) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; -- cgit v1.2.3 From 3b48ab2248e61408910e792fe84d6ec466084c1a Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Tue, 3 Jan 2017 01:39:10 +0100 Subject: drop_monitor: consider inserted data in genlmsg_end Final nlmsg_len field update must reflect inserted net_dm_drop_point data. This patch depends on previous patch: "drop_monitor: add missing call to genlmsg_end" Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index f465bad2ef2c..fb55327dcfea 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -102,7 +102,6 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) } msg = nla_data(nla); memset(msg, 0, al); - genlmsg_end(skb, msg_header); goto out; err: @@ -112,6 +111,13 @@ out: swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } -- cgit v1.2.3 From 9988f4d577f42f43b7612d755477585f35424af7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Dec 2016 12:59:31 -0800 Subject: latent_entropy: fix ARM build error on earlier gcc This fixes build errors seen on gcc-4.9.3 or gcc-5.3.1 for an ARM: arm-soc/init/initramfs.c: In function 'error': arm-soc/init/initramfs.c:50:1: error: unrecognizable insn: } ^ (insn 26 25 27 5 (set (reg:SI 111 [ local_entropy.243 ]) (rotatert:SI (reg:SI 116 [ local_entropy.243 ]) (const_int -30 [0xffffffffffffffe2]))) -1 (nil)) Patch from PaX Team Reported-by: Arnd Bergmann Reported-by: Brad Spengler Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/latent_entropy_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 12541126575b..8ff203ad4809 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -328,9 +328,9 @@ static enum tree_code get_op(tree *rhs) op = LROTATE_EXPR; /* * This code limits the value of random_const to - * the size of a wide int for the rotation + * the size of a long for the rotation */ - random_const &= HOST_BITS_PER_WIDE_INT - 1; + random_const %= TYPE_PRECISION(long_unsigned_type_node); break; } -- cgit v1.2.3 From 81d873a87114b05dbb74d1fbf0c4322ba4bfdee4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Dec 2016 11:36:06 -0800 Subject: gcc-plugins: update gcc-common.h for gcc-7 This updates gcc-common.h from Emese Revfy for gcc 7. This fixes issues seen by Kugan and Arnd. Build tested with gcc 5.4 and 7 snapshot. Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/gcc-common.h | 85 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 950fd2e64bb7..12262c0cc691 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -39,6 +39,9 @@ #include "hash-map.h" #endif +#if BUILDING_GCC_VERSION >= 7000 +#include "memmodel.h" +#endif #include "emit-rtl.h" #include "debug.h" #include "target.h" @@ -91,6 +94,9 @@ #include "tree-ssa-alias.h" #include "tree-ssa.h" #include "stringpool.h" +#if BUILDING_GCC_VERSION >= 7000 +#include "tree-vrp.h" +#endif #include "tree-ssanames.h" #include "print-tree.h" #include "tree-eh.h" @@ -287,6 +293,22 @@ static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct c return NULL; } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + cgraph_node_ptr alias; + + if (callback(node, data)) + return true; + + for (alias = node->same_body; alias; alias = alias->next) { + if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE) + if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable)) + return true; + } + + return false; +} + #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \ for ((node) = cgraph_first_function_with_gimple_body(); (node); \ (node) = cgraph_next_function_with_gimple_body(node)) @@ -399,6 +421,7 @@ typedef union gimple_statement_d gassign; typedef union gimple_statement_d gcall; typedef union gimple_statement_d gcond; typedef union gimple_statement_d gdebug; +typedef union gimple_statement_d ggoto; typedef union gimple_statement_d gphi; typedef union gimple_statement_d greturn; @@ -452,6 +475,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return stmt; @@ -496,6 +529,14 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt) typedef struct rtx_def rtx_insn; +static inline const char *get_decl_section_name(const_tree decl) +{ + if (DECL_SECTION_NAME(decl) == NULL_TREE) + return NULL; + + return TREE_STRING_POINTER(DECL_SECTION_NAME(decl)); +} + static inline void set_decl_section_name(tree node, const char *value) { if (value) @@ -511,6 +552,7 @@ typedef struct gimple_statement_base gassign; typedef struct gimple_statement_call gcall; typedef struct gimple_statement_base gcond; typedef struct gimple_statement_base gdebug; +typedef struct gimple_statement_base ggoto; typedef struct gimple_statement_phi gphi; typedef struct gimple_statement_base greturn; @@ -564,6 +606,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -611,6 +663,11 @@ inline bool is_a_helper::test(const_gimple gs) #define INSN_DELETED_P(insn) (insn)->deleted() +static inline const char *get_decl_section_name(const_tree decl) +{ + return DECL_SECTION_NAME(decl); +} + /* symtab/cgraph related */ #define debug_cgraph_node(node) (node)->debug() #define cgraph_get_node(decl) cgraph_node::get(decl) @@ -619,6 +676,7 @@ inline bool is_a_helper::test(const_gimple gs) #define cgraph_n_nodes symtab->cgraph_count #define cgraph_max_uid symtab->cgraph_max_uid #define varpool_get_node(decl) varpool_node::get(decl) +#define dump_varpool_node(file, node) (node)->dump(file) #define cgraph_create_edge(caller, callee, call_stmt, count, freq, nest) \ (caller)->create_edge((callee), (call_stmt), (count), (freq)) @@ -674,6 +732,11 @@ static inline cgraph_node_ptr cgraph_alias_target(cgraph_node_ptr node) return node->get_alias_target(); } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + return node->call_for_symbol_thunks_and_aliases(callback, data, include_overwritable); +} + static inline struct cgraph_node_hook_list *cgraph_add_function_insertion_hook(cgraph_node_hook hook, void *data) { return symtab->add_cgraph_insertion_hook(hook, data); @@ -729,6 +792,13 @@ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree l return gimple_build_assign(lhs, subcode, op1, op2 PASS_MEM_STAT); } +template <> +template <> +inline bool is_a_helper::test(const_gimple gs) +{ + return gs->code == GIMPLE_GOTO; +} + template <> template <> inline bool is_a_helper::test(const_gimple gs) @@ -766,6 +836,16 @@ static inline const gcall *as_a_const_gcall(const_gimple stmt) return as_a(stmt); } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return as_a(stmt); +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return as_a(stmt); +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -828,4 +908,9 @@ static inline void debug_gimple_stmt(const_gimple s) #define debug_gimple_stmt(s) debug_gimple_stmt(CONST_CAST_GIMPLE(s)) #endif +#if BUILDING_GCC_VERSION >= 7000 +#define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning) \ + get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep) +#endif + #endif -- cgit v1.2.3 From 7158339d4c1ede786c48fa5c062fa68df366ba94 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 3 Jan 2017 17:51:33 -0500 Subject: block: fix up io_poll documentation /sys/block//queue/io_poll is a boolean. Fix the docs. Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe --- Documentation/block/queue-sysfs.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 51642159aedb..c0a3bb5a6e4e 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -54,9 +54,9 @@ This is the hardware sector size of the device, in bytes. io_poll (RW) ------------ -When read, this file shows the total number of block IO polls and how -many returned success. Writing '0' to this file will disable polling -for this device. Writing any non-zero value will enable this feature. +When read, this file shows whether polling is enabled (1) or disabled +(0). Writing '0' to this file will disable polling for this device. +Writing any non-zero value will enable this feature. io_poll_delay (RW) ------------------ -- cgit v1.2.3 From 926d93a33e59b2729afdbad357233c17184de9d2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Jan 2017 09:37:55 -0800 Subject: net: vrf: Add missing Rx counters The move from rx-handler to L3 receive handler inadvertantly dropped the rx counters. Restore them. Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6") Reported-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7532646c3b7b..23dfb0eac098 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -967,6 +967,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, */ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); if (!ipv6_ndisc_frame(skb) && !need_strict) { + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1011,6 +1012,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } + vrf_rx_stats(vrf_dev, skb->len); + skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); -- cgit v1.2.3 From 7a21272b088894070391a94fdd1c67014020fa1d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 3 Jan 2017 18:39:32 -0800 Subject: xfs: fix double-cleanup when CUI recovery fails Dan Carpenter reported a double-free of rcur if _defer_finish fails while we're recovering CUI items. Fix the error recovery to prevent this. Reported-by: Dan Carpenter Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_refcount_item.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fe86a668a57e..6e4c7446c3d4 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -526,13 +526,14 @@ xfs_cui_recover( xfs_refcount_finish_one_cleanup(tp, rcur, error); error = xfs_defer_finish(&tp, &dfops, NULL); if (error) - goto abort_error; + goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); +abort_defer: xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; -- cgit v1.2.3 From 20e73b000bcded44a91b79429d8fa743247602ad Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 3 Jan 2017 18:39:33 -0800 Subject: xfs: use the actual AG length when reserving blocks We need to use the actual AG length when making per-AG reservations, since we could otherwise end up reserving more blocks out of the last AG than there are actual blocks. Complained-about-by: Brian Foster Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_ag_resv.c | 3 +++ fs/xfs/libxfs/xfs_refcount_btree.c | 9 ++++++--- fs/xfs/libxfs/xfs_refcount_btree.h | 3 ++- fs/xfs/libxfs/xfs_rmap_btree.c | 14 +++++++------- fs/xfs/libxfs/xfs_rmap_btree.h | 3 ++- fs/xfs/xfs_fsops.c | 14 ++++++++++++++ 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e5ebc3770460..d346d42c54d1 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -256,6 +256,9 @@ xfs_ag_resv_init( goto out; } + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + pag->pagf_freeblks + pag->pagf_flcount); out: return error; } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 6fb2215f8ff7..50add5272807 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -409,13 +409,14 @@ xfs_refcountbt_calc_size( */ xfs_extlen_t xfs_refcountbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_refc_mxr[0] == 0) return 0; - return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_refcountbt_calc_size(mp, agblocks); } /* @@ -430,22 +431,24 @@ xfs_refcountbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - *ask += xfs_refcountbt_max_size(mp); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_buf_relse(agbp); + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 3be7768bd51a..9db008b955b7 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index de25771764ba..74e5a54bc428 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -550,13 +550,14 @@ xfs_rmapbt_calc_size( */ xfs_extlen_t xfs_rmapbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0; - return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_rmapbt_calc_size(mp, agblocks); } /* @@ -571,25 +572,24 @@ xfs_rmapbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; - xfs_extlen_t pool_len; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - /* Reserve 1% of the AG or enough for 1 block per record. */ - pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); - *ask += pool_len; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_buf_relse(agbp); + /* Reserve 1% of the AG or enough for 1 block per record. */ + *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 2a9ac472fb15..19c08e933049 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 93d12fa2670d..242e8091296d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -631,6 +631,20 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + /* + * If we expanded the last AG, free the per-AG reservation + * so we can reinitialize it with the new size. + */ + if (new) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + error = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (error) + goto out; + } + /* Reserve AG metadata blocks. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) -- cgit v1.2.3 From a1b7a4dea6166cf46be895bce4aac67ea5160fe8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jan 2017 18:39:33 -0800 Subject: xfs: fix crash and data corruption due to removal of busy COW extents There is a race window between write_cache_pages calling clear_page_dirty_for_io and XFS calling set_page_writeback, in which the mapping for an inode is tagged neither as dirty, nor as writeback. If the COW shrinker hits in exactly that window we'll remove the delayed COW extents and writepages trying to write it back, which in release kernels will manifest as corruption of the bmap btree, and in debug kernels will trip the ASSERT about now calling xfs_bmapi_write with the COWFORK flag for holes. A complex customer load manages to hit this window fairly reliably, probably by always having COW writeback in flight while the cow shrinker runs. This patch adds another check for having the I_DIRTY_PAGES flag set, which is still set during this race window. While this fixes the problem I'm still not overly happy about the way the COW shrinker works as it still seems a bit fragile. Signed-off-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index ff4d6311c7f4..70ca4f608321 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1597,7 +1597,8 @@ xfs_inode_free_cowblocks( * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ - if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; -- cgit v1.2.3 From 721a0edfbe1f302b93274ce75e0d62843ca63e0d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 3 Jan 2017 18:39:34 -0800 Subject: xfs: update MAINTAINERS I am taking over as XFS maintainer from Dave Chinner[1], so update contact information and git tree pointers. [1] http://lkml.iu.edu/hypermail/linux/kernel/1612.1/04390.html Signed-off-by: Darrick J. Wong --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..793a016d9e4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13527,11 +13527,11 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Dave Chinner +M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org W: http://xfs.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git +T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported F: Documentation/filesystems/xfs.txt F: fs/xfs/ -- cgit v1.2.3 From ff97f2399edac1e0fb3fa7851d5fbcbdf04717cf Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 3 Jan 2017 20:34:17 -0800 Subject: xfs: fix max_retries _show and _store functions max_retries _show and _store functions should test against cfg->max_retries, not cfg->retry_timeout Signed-off-by: Carlos Maiolino Reviewed-by: Eric Sandeen Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 276d3023d60f..de6195e38910 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -396,7 +396,7 @@ max_retries_show( int retries; struct xfs_error_cfg *cfg = to_error_cfg(kobject); - if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER) + if (cfg->max_retries == XFS_ERR_RETRY_FOREVER) retries = -1; else retries = cfg->max_retries; @@ -422,7 +422,7 @@ max_retries_store( return -EINVAL; if (val == -1) - cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; + cfg->max_retries = XFS_ERR_RETRY_FOREVER; else cfg->max_retries = val; return count; -- cgit v1.2.3 From 04f6152d9fbad5bb78bccd05e798fa2d66c571e9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 4 Jan 2017 12:58:44 +0100 Subject: MAINTAINERS: add myself as maintainer of fbdev I would like to help with fbdev maintenance. I can dedicate some time for reviewing and handling patches but won't have time for much more. The subsystem will remain in maintenance mode (no new drivers will be added to it). Cc: Tomi Valkeinen Cc: Daniel Vetter Signed-off-by: Bartlomiej Zolnierkiewicz --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..2775b14f861a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5080,9 +5080,11 @@ F: drivers/net/wan/dlci.c F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER +M: Bartlomiej Zolnierkiewicz L: linux-fbdev@vger.kernel.org +T: git git://github.com/bzolnier/linux.git Q: http://patchwork.kernel.org/project/linux-fbdev/list/ -S: Orphan +S: Maintained F: Documentation/fb/ F: drivers/video/ F: include/video/ -- cgit v1.2.3 From 4dcd19bfabaee8f9f4bcf203afba09b98ccbaf76 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Dec 2016 13:50:52 +0530 Subject: video: fbdev: cobalt_lcdfb: Handle return NULL error from devm_ioremap Here, If devm_ioremap will fail. It will return NULL. Kernel can run into a NULL-pointer dereference. This error check will avoid NULL pointer dereference. Signed-off-by: Arvind Yadav Acked-by: Yoichi Yuasa Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/video/fbdev/cobalt_lcdfb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c index 2d3b691f3fc4..038ac6934fe9 100644 --- a/drivers/video/fbdev/cobalt_lcdfb.c +++ b/drivers/video/fbdev/cobalt_lcdfb.c @@ -308,6 +308,11 @@ static int cobalt_lcdfb_probe(struct platform_device *dev) info->screen_size = resource_size(res); info->screen_base = devm_ioremap(&dev->dev, res->start, info->screen_size); + if (!info->screen_base) { + framebuffer_release(info); + return -ENOMEM; + } + info->fbops = &cobalt_lcd_fbops; info->fix = cobalt_lcdfb_fix; info->fix.smem_start = res->start; -- cgit v1.2.3 From 63dfb0dac9055145db85ce764355aef2f563739a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Jan 2017 17:22:20 +0800 Subject: net: usb: asix_devices: add .reset_resume for USB PM The USB core may call reset_resume when it fails to resume asix device. And USB core can recovery this abnormal resume at low level driver, the same .resume at asix driver can work too. Add .reset_resume can avoid disconnecting after backing from system resume, and NFS can still be mounted after this commit. Signed-off-by: Peter Chen Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 6c646e228833..6e98ede997d3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1367,6 +1367,7 @@ static struct usb_driver asix_driver = { .probe = usbnet_probe, .suspend = asix_suspend, .resume = asix_resume, + .reset_resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, -- cgit v1.2.3 From a9a8cdb368d99bb655b5cdabea560446db0527cc Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 3 Jan 2017 21:25:48 +0530 Subject: libcxgb: fix error check for ip6_route_output() ip6_route_output() never returns NULL so check dst->error instead of !dst. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index 0f0de5b63622..d04a6c163445 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi, if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = sin6_scope_id; dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgb_our_interface(lldi, get_real_dev, - ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + if (dst->error || + (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) { dst_release(dst); - dst = NULL; + return NULL; } } -out: return dst; } EXPORT_SYMBOL(cxgb_find_route6); -- cgit v1.2.3 From cd7aeb1f9706b665ad8659df8ff036e7bc0097f4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 3 Jan 2017 13:57:00 -0500 Subject: LiquidIO VF: s/select/imply/ for PTP_1588_CLOCK Fix a minor fallout from the merge of the timers and the networking trees. The following error may result if the PTP_1588_CLOCK prerequisites are not available: drivers/built-in.o: In function `ptp_clock_unregister': (.text+0x40e0a5): undefined reference to `pps_unregister_source' drivers/built-in.o: In function `ptp_clock_unregister': (.text+0x40e0cc): undefined reference to `posix_clock_unregister' drivers/built-in.o: In function `ptp_clock_event': (.text+0x40e249): undefined reference to `pps_event' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e5e1): undefined reference to `pps_register_source' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e62c): undefined reference to `posix_clock_register' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e68d): undefined reference to `pps_unregister_source' Signed-off-by: Nicolas Pitre Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index bbc8bd16cb97..dcbce6cac63e 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -77,7 +77,7 @@ config OCTEON_MGMT_ETHERNET config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI - select PTP_1588_CLOCK + imply PTP_1588_CLOCK ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. -- cgit v1.2.3 From bb7da333d0a9f3bddc08f84187b7579a3f68fd24 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:48 -0800 Subject: net: systemport: Utilize skb_put_padto() Since we need to pad our packets, utilize skb_put_padto() which increases skb->len by how much we need to pad, allowing us to eliminate the test on skb->len right below. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 25d1eb4933d0..e67908b5edfe 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1028,13 +1028,12 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. */ - if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { ret = NETDEV_TX_OK; goto out; } - skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ? - ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len; + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); if (dma_mapping_error(kdev, mapping)) { -- cgit v1.2.3 From 38e5a85562a6cd911fc26d951d576551a688574c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:49 -0800 Subject: net: systemport: Pad packet before inserting TSB Inserting the TSB means adding an extra 8 bytes in front the of packet that is going to be used as metadata information by the TDMA engine, but stripped off, so it does not really help with the packet padding. For some odd packet sizes that fall below the 60 bytes payload (e.g: ARP) we can end-up padding them after the TSB insertion, thus making them 64 bytes, but with the TDMA stripping off the first 8 bytes, they could still be smaller than 64 bytes which is required to ingress the switch. Fix this by swapping the padding and TSB insertion, guaranteeing that the packets have the right sizes. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index e67908b5edfe..7e8cf213fd81 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1012,15 +1012,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } - /* Insert TSB and checksum infos */ - if (priv->tsb_en) { - skb = bcm_sysport_insert_tsb(skb, dev); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } - } - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -1033,6 +1024,15 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { + skb = bcm_sysport_insert_tsb(skb, dev); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } + } + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); -- cgit v1.2.3 From 3fe61f0940d9c7892462c893602fdccfe8b24e8c Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 4 Jan 2017 13:21:29 +0200 Subject: dpaa_eth: cleanup after init_phy() failure Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 624ba9058dc4..77517aa3e8d9 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2291,7 +2291,8 @@ static int dpaa_open(struct net_device *net_dev) net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev); if (!net_dev->phydev) { netif_err(priv, ifup, net_dev, "init_phy() failed\n"); - return -ENODEV; + err = -ENODEV; + goto phy_init_failed; } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { @@ -2314,6 +2315,7 @@ mac_start_failed: for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) fman_port_disable(mac_dev->port[i]); +phy_init_failed: dpaa_eth_napi_disable(priv); return err; -- cgit v1.2.3 From 0fbb0f24dde8759925fc56e9dbc6a5b2cbba99c4 Mon Sep 17 00:00:00 2001 From: Roy Pledge Date: Wed, 4 Jan 2017 13:21:30 +0200 Subject: dpaa_eth: Initialize CGR structure before init The QBMan CGR options needs to be zeroed before calling the init function Signed-off-by: Roy Pledge Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 77517aa3e8d9..c9b7ad65e563 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -733,6 +733,7 @@ static int dpaa_eth_cgr_init(struct dpaa_priv *priv) priv->cgr_data.cgr.cb = dpaa_eth_cgscn; /* Enable Congestion State Change Notifications and CS taildrop */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; @@ -2422,6 +2423,7 @@ static int dpaa_ingress_cgr_init(struct dpaa_priv *priv) } /* Enable CS TD, but disable Congestion State Change Notifications. */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; cs_th = DPAA_INGRESS_CS_THRESHOLD; -- cgit v1.2.3 From 4fdda95893de776a8efdf661bbf0e338f2f13dcb Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 4 Jan 2017 15:10:56 +0000 Subject: sfc: don't report RX hash keys to ethtool when RSS wasn't enabled If we failed to set up RSS on EF10 (e.g. because firmware declared RX_RSS_LIMITED), ethtool --show-nfc $dev rx-flow-hash ... should report no fields, rather than confusingly reporting what fields we _would_ be hashing on if RSS was working. Fixes: dcb4123cbec0 ("sfc: disable RSS when unsupported") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 3 ++- drivers/net/ethernet/sfc/ethtool.c | 2 ++ drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/siena.c | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index de2947ccc5ad..5eb0e684fd76 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1323,7 +1323,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx) } /* don't fail init if RSS setup doesn't work */ - efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = (rc == 0); return 0; } diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 87bdc56b4e3a..18ebaea44e82 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -975,6 +975,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXFH: { info->data = 0; + if (!efx->rss_active) /* No RSS */ + return 0; switch (info->flow_type) { case UDP_V4_FLOW: if (efx->rx_hash_udp_4tuple) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 1a635ced62d0..1c62c1a00fca 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -860,6 +860,7 @@ struct vfdi_status; * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS * @rx_scatter: Scatter mode enabled for receives + * @rss_active: RSS enabled on hardware * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired @@ -998,6 +999,7 @@ struct efx_nic { u8 rx_hash_key[40]; u32 rx_indir_table[128]; bool rx_scatter; + bool rss_active; bool rx_hash_udp_4tuple; unsigned int_error_count; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index a3901bc96586..4e54e5dc9fcb 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -403,6 +403,7 @@ static int siena_init_nic(struct efx_nic *efx) efx_writeo(efx, &temp, FR_AZ_RX_CFG); siena_rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = true; /* Enable event logging */ rc = efx_mcdi_log_ctrl(efx, true, false, 0); -- cgit v1.2.3 From 71eae1ca77fd6be218d8a952d97bba827e56516d Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 4 Jan 2017 23:10:23 +0300 Subject: sh_eth: enable RX descriptor word 0 shift on SH7734 The RX descriptor word 0 on SH7734 has the RFS[9:0] field in bits 16-25 (bits 0-15 usually used for that are occupied by the packet checksum). Thus we need to set the 'shift_rd0' field in the SH7734 SoC data... Fixes: f0e81fecd4f8 ("net: sh_eth: Add support SH7734") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 0af7fc279c85..00fafabab1d0 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -819,6 +819,7 @@ static struct sh_eth_cpu_data sh7734_data = { .tsu = 1, .hw_crc = 1, .select_mii = 1, + .shift_rd0 = 1, }; /* SH7763 */ -- cgit v1.2.3