From 607904c357c61adf20b8fd18af765e501d61a385 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 9 Jan 2017 10:26:52 -0500 Subject: locking/spinlocks: Remove the unused spin_lock_bh_nested() API The spin_lock_bh_nested() API is defined but is not used anywhere in the kernel. So all spin_lock_bh_nested() and related APIs are now removed. Signed-off-by: Waiman Long Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1483975612-16447-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 8 -------- include/linux/spinlock_api_smp.h | 2 -- include/linux/spinlock_api_up.h | 1 - 3 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 47dd0cebd204..59248dcc6ef3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock_nested(lock, subclass) -# define raw_spin_lock_bh_nested(lock, subclass) \ - _raw_spin_lock_bh_nested(lock, subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) -# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -317,11 +314,6 @@ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ } while (0) -#define spin_lock_bh_nested(lock, subclass) \ -do { \ - raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\ -} while (0) - #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 5344268e6e62..42dfab89e740 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr); void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) - __acquires(lock); void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d3afef9d8dbe..d0d188861ad6 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -57,7 +57,6 @@ #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) -#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) -- cgit v1.2.3 From a665ece8b471de45bc19af05d52a1eaa5bc06dca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2017 13:23:31 +0200 Subject: x86/platform/intel: Remove PMIC GPIO block support Moorestown support was removed by commit: 1a8359e411eb ("x86/mid: Remove Intel Moorestown") Remove this leftover. Signed-off-by: Andy Shevchenko Cc: Darren Hart Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20170112112331.93236-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/sfi.c | 1 - drivers/platform/x86/Kconfig | 7 - drivers/platform/x86/Makefile | 1 - drivers/platform/x86/intel_pmic_gpio.c | 326 --------------------------------- include/linux/intel_pmic_gpio.h | 15 -- 5 files changed, 350 deletions(-) delete mode 100644 drivers/platform/x86/intel_pmic_gpio.c delete mode 100644 include/linux/intel_pmic_gpio.h (limited to 'include') diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index ce1303830231..19b43e3a9f0f 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5fe8be089b8b..fe5a3da3682f 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -816,13 +816,6 @@ config INTEL_SCU_IPC_UTIL low level access for debug work and updating the firmware. Say N unless you will be doing this on an Intel MID platform. -config GPIO_INTEL_PMIC - bool "Intel PMIC GPIO support" - depends on INTEL_SCU_IPC && GPIOLIB - ---help--- - Say Y here to support GPIO via the SCU IPC interface - on Intel MID platforms. - config INTEL_MID_POWER_BUTTON tristate "power button driver for Intel MID platforms" depends on INTEL_SCU_IPC && INPUT diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index d4111f0f8a78..b2f52a7690af 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -50,7 +50,6 @@ obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_INTEL_IPS) += intel_ips.o -obj-$(CONFIG_GPIO_INTEL_PMIC) += intel_pmic_gpio.o obj-$(CONFIG_XO1_RFKILL) += xo1-rfkill.o obj-$(CONFIG_XO15_EBOOK) += xo15-ebook.o obj-$(CONFIG_IBM_RTL) += ibm_rtl.o diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c deleted file mode 100644 index 91ae58510d92..000000000000 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ /dev/null @@ -1,326 +0,0 @@ -/* Moorestown PMIC GPIO (access through IPC) driver - * Copyright (c) 2008 - 2009, Intel Corporation. - * - * Author: Alek Du - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* Supports: - * Moorestown platform PMIC chip - */ - -#define pr_fmt(fmt) "%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRIVER_NAME "pmic_gpio" - -/* register offset that IPC driver should use - * 8 GPIO + 8 GPOSW (6 controllable) + 8GPO - */ -enum pmic_gpio_register { - GPIO0 = 0xE0, - GPIO7 = 0xE7, - GPIOINT = 0xE8, - GPOSWCTL0 = 0xEC, - GPOSWCTL5 = 0xF1, - GPO = 0xF4, -}; - -/* bits definition for GPIO & GPOSW */ -#define GPIO_DRV 0x01 -#define GPIO_DIR 0x02 -#define GPIO_DIN 0x04 -#define GPIO_DOU 0x08 -#define GPIO_INTCTL 0x30 -#define GPIO_DBC 0xc0 - -#define GPOSW_DRV 0x01 -#define GPOSW_DOU 0x08 -#define GPOSW_RDRV 0x30 - -#define GPIO_UPDATE_TYPE 0x80000000 - -#define NUM_GPIO 24 - -struct pmic_gpio { - struct mutex buslock; - struct gpio_chip chip; - void *gpiointr; - int irq; - unsigned irq_base; - unsigned int update_type; - u32 trigger_type; -}; - -static void pmic_program_irqtype(int gpio, int type) -{ - if (type & IRQ_TYPE_EDGE_RISING) - intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20); - else - intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20); - - if (type & IRQ_TYPE_EDGE_FALLING) - intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10); - else - intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10); -}; - -static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - if (offset >= 8) { - pr_err("only pin 0-7 support input\n"); - return -1;/* we only have 8 GPIO can use as input */ - } - return intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DIR, GPIO_DIR); -} - -static int pmic_gpio_direction_output(struct gpio_chip *chip, - unsigned offset, int value) -{ - int rc = 0; - - if (offset < 8)/* it is GPIO */ - rc = intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DRV | (value ? GPIO_DOU : 0), - GPIO_DRV | GPIO_DOU | GPIO_DIR); - else if (offset < 16)/* it is GPOSW */ - rc = intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8, - GPOSW_DRV | (value ? GPOSW_DOU : 0), - GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV); - else if (offset > 15 && offset < 24)/* it is GPO */ - rc = intel_scu_ipc_update_register(GPO, - value ? 1 << (offset - 16) : 0, - 1 << (offset - 16)); - else { - pr_err("invalid PMIC GPIO pin %d!\n", offset); - WARN_ON(1); - } - - return rc; -} - -static int pmic_gpio_get(struct gpio_chip *chip, unsigned offset) -{ - u8 r; - int ret; - - /* we only have 8 GPIO pins we can use as input */ - if (offset >= 8) - return -EOPNOTSUPP; - ret = intel_scu_ipc_ioread8(GPIO0 + offset, &r); - if (ret < 0) - return ret; - return r & GPIO_DIN; -} - -static void pmic_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -{ - if (offset < 8)/* it is GPIO */ - intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DRV | (value ? GPIO_DOU : 0), - GPIO_DRV | GPIO_DOU); - else if (offset < 16)/* it is GPOSW */ - intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8, - GPOSW_DRV | (value ? GPOSW_DOU : 0), - GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV); - else if (offset > 15 && offset < 24) /* it is GPO */ - intel_scu_ipc_update_register(GPO, - value ? 1 << (offset - 16) : 0, - 1 << (offset - 16)); -} - -/* - * This is called from genirq with pg->buslock locked and - * irq_desc->lock held. We can not access the scu bus here, so we - * store the change and update in the bus_sync_unlock() function below - */ -static int pmic_irq_type(struct irq_data *data, unsigned type) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - u32 gpio = data->irq - pg->irq_base; - - if (gpio >= pg->chip.ngpio) - return -EINVAL; - - pg->trigger_type = type; - pg->update_type = gpio | GPIO_UPDATE_TYPE; - return 0; -} - -static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset) -{ - struct pmic_gpio *pg = gpiochip_get_data(chip); - - return pg->irq_base + offset; -} - -static void pmic_bus_lock(struct irq_data *data) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - - mutex_lock(&pg->buslock); -} - -static void pmic_bus_sync_unlock(struct irq_data *data) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - - if (pg->update_type) { - unsigned int gpio = pg->update_type & ~GPIO_UPDATE_TYPE; - - pmic_program_irqtype(gpio, pg->trigger_type); - pg->update_type = 0; - } - mutex_unlock(&pg->buslock); -} - -/* the gpiointr register is read-clear, so just do nothing. */ -static void pmic_irq_unmask(struct irq_data *data) { } - -static void pmic_irq_mask(struct irq_data *data) { } - -static struct irq_chip pmic_irqchip = { - .name = "PMIC-GPIO", - .irq_mask = pmic_irq_mask, - .irq_unmask = pmic_irq_unmask, - .irq_set_type = pmic_irq_type, - .irq_bus_lock = pmic_bus_lock, - .irq_bus_sync_unlock = pmic_bus_sync_unlock, -}; - -static irqreturn_t pmic_irq_handler(int irq, void *data) -{ - struct pmic_gpio *pg = data; - u8 intsts = *((u8 *)pg->gpiointr + 4); - int gpio; - irqreturn_t ret = IRQ_NONE; - - for (gpio = 0; gpio < 8; gpio++) { - if (intsts & (1 << gpio)) { - pr_debug("pmic pin %d triggered\n", gpio); - generic_handle_irq(pg->irq_base + gpio); - ret = IRQ_HANDLED; - } - } - return ret; -} - -static int platform_pmic_gpio_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - int irq = platform_get_irq(pdev, 0); - struct intel_pmic_gpio_platform_data *pdata = dev->platform_data; - - struct pmic_gpio *pg; - int retval; - int i; - - if (irq < 0) { - dev_dbg(dev, "no IRQ line\n"); - return -EINVAL; - } - - if (!pdata || !pdata->gpio_base || !pdata->irq_base) { - dev_dbg(dev, "incorrect or missing platform data\n"); - return -EINVAL; - } - - pg = kzalloc(sizeof(*pg), GFP_KERNEL); - if (!pg) - return -ENOMEM; - - dev_set_drvdata(dev, pg); - - pg->irq = irq; - /* setting up SRAM mapping for GPIOINT register */ - pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8); - if (!pg->gpiointr) { - pr_err("Can not map GPIOINT\n"); - retval = -EINVAL; - goto err2; - } - pg->irq_base = pdata->irq_base; - pg->chip.label = "intel_pmic"; - pg->chip.direction_input = pmic_gpio_direction_input; - pg->chip.direction_output = pmic_gpio_direction_output; - pg->chip.get = pmic_gpio_get; - pg->chip.set = pmic_gpio_set; - pg->chip.to_irq = pmic_gpio_to_irq; - pg->chip.base = pdata->gpio_base; - pg->chip.ngpio = NUM_GPIO; - pg->chip.can_sleep = 1; - pg->chip.parent = dev; - - mutex_init(&pg->buslock); - - pg->chip.parent = dev; - retval = gpiochip_add_data(&pg->chip, pg); - if (retval) { - pr_err("Can not add pmic gpio chip\n"); - goto err; - } - - retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg); - if (retval) { - pr_warn("Interrupt request failed\n"); - goto fail_request_irq; - } - - for (i = 0; i < 8; i++) { - irq_set_chip_and_handler_name(i + pg->irq_base, - &pmic_irqchip, - handle_simple_irq, - "demux"); - irq_set_chip_data(i + pg->irq_base, pg); - } - return 0; - -fail_request_irq: - gpiochip_remove(&pg->chip); -err: - iounmap(pg->gpiointr); -err2: - kfree(pg); - return retval; -} - -/* at the same time, register a platform driver - * this supports the sfi 0.81 fw */ -static struct platform_driver platform_pmic_gpio_driver = { - .driver = { - .name = DRIVER_NAME, - }, - .probe = platform_pmic_gpio_probe, -}; - -static int __init platform_pmic_gpio_init(void) -{ - return platform_driver_register(&platform_pmic_gpio_driver); -} -subsys_initcall(platform_pmic_gpio_init); diff --git a/include/linux/intel_pmic_gpio.h b/include/linux/intel_pmic_gpio.h deleted file mode 100644 index 920109a29191..000000000000 --- a/include/linux/intel_pmic_gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LINUX_INTEL_PMIC_H -#define LINUX_INTEL_PMIC_H - -struct intel_pmic_gpio_platform_data { - /* the first IRQ of the chip */ - unsigned irq_base; - /* number assigned to the first GPIO */ - unsigned gpio_base; - /* sram address for gpiointr register, the langwell chip will map - * the PMIC spi GPIO expander's GPIOINTR register in sram. - */ - unsigned gpiointr; -}; - -#endif -- cgit v1.2.3 From 5b485629ba0d5d027880769ff467c587b24b4bde Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 8 Jan 2017 23:58:09 +0900 Subject: kprobes, extable: Identify kprobes trampolines as kernel text area Improve __kernel_text_address()/kernel_text_address() to return true if the given address is on a kprobe's instruction slot trampoline. This can help stacktraces to determine the address is on a text area or not. To implement this atomically in is_kprobe_*_slot(), also change the insn_cache page list to an RCU list. This changes timings a bit (it delays page freeing to the RCU garbage collection phase), but none of that is in the hot path. Note: this change can add small overhead to stack unwinders because it adds 2 additional checks to __kernel_text_address(). However, the impact should be very small, because kprobe_insn_pages list has 1 entry per 256 probes(on x86, on arm/arm64 it will be 1024 probes), and kprobe_optinsn_pages has 1 entry per 32 probes(on x86). In most use cases, the number of kprobe events may be less than 20, which means that is_kprobe_*_slot() will check just one entry. Tested-by: Josh Poimboeuf Signed-off-by: Masami Hiramatsu Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Andrey Konovalov Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/148388747896.6869.6354262871751682264.stgit@devbox [ Improved the changelog and coding style. ] Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 30 +++++++++++++++++++- kernel/extable.c | 9 +++++- kernel/kprobes.c | 73 ++++++++++++++++++++++++++++++++++++------------- 3 files changed, 91 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 8f6849084248..16ddfb8b304a 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -278,9 +278,13 @@ struct kprobe_insn_cache { int nr_garbage; }; +#ifdef __ARCH_WANT_KPROBES_INSN_SLOT extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c); extern void __free_insn_slot(struct kprobe_insn_cache *c, kprobe_opcode_t *slot, int dirty); +/* sleep-less address checking routine */ +extern bool __is_insn_slot_addr(struct kprobe_insn_cache *c, + unsigned long addr); #define DEFINE_INSN_CACHE_OPS(__name) \ extern struct kprobe_insn_cache kprobe_##__name##_slots; \ @@ -294,6 +298,18 @@ static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\ { \ __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \ } \ + \ +static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ +{ \ + return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \ +} +#else /* __ARCH_WANT_KPROBES_INSN_SLOT */ +#define DEFINE_INSN_CACHE_OPS(__name) \ +static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ +{ \ + return 0; \ +} +#endif DEFINE_INSN_CACHE_OPS(insn); @@ -330,7 +346,6 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif - #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, @@ -481,6 +496,19 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifndef CONFIG_KPROBES +static inline bool is_kprobe_insn_slot(unsigned long addr) +{ + return false; +} +#endif +#ifndef CONFIG_OPTPROBES +static inline bool is_kprobe_optinsn_slot(unsigned long addr) +{ + return false; +} +#endif + #ifdef CONFIG_KPROBES /* * Blacklist ganerating macro. Specify functions which is not probed diff --git a/kernel/extable.c b/kernel/extable.c index e3beec4a2339..e1359474baa5 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_ftrace_trampoline(addr)) return 1; + if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -123,7 +126,11 @@ int kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; - return is_ftrace_trampoline(addr); + if (is_ftrace_trampoline(addr)) + return 1; + if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) + return 1; + return 0; } /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 43460104f119..ebb4dadca66b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -149,9 +149,11 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) struct kprobe_insn_page *kip; kprobe_opcode_t *slot = NULL; + /* Since the slot array is not protected by rcu, we need a mutex */ mutex_lock(&c->mutex); retry: - list_for_each_entry(kip, &c->pages, list) { + rcu_read_lock(); + list_for_each_entry_rcu(kip, &c->pages, list) { if (kip->nused < slots_per_page(c)) { int i; for (i = 0; i < slots_per_page(c); i++) { @@ -159,6 +161,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) kip->slot_used[i] = SLOT_USED; kip->nused++; slot = kip->insns + (i * c->insn_size); + rcu_read_unlock(); goto out; } } @@ -167,6 +170,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) WARN_ON(1); } } + rcu_read_unlock(); /* If there are any garbage slots, collect it and try again. */ if (c->nr_garbage && collect_garbage_slots(c) == 0) @@ -193,7 +197,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) kip->nused = 1; kip->ngarbage = 0; kip->cache = c; - list_add(&kip->list, &c->pages); + list_add_rcu(&kip->list, &c->pages); slot = kip->insns; out: mutex_unlock(&c->mutex); @@ -213,7 +217,8 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx) * next time somebody inserts a probe. */ if (!list_is_singular(&kip->list)) { - list_del(&kip->list); + list_del_rcu(&kip->list); + synchronize_rcu(); kip->cache->free(kip->insns); kfree(kip); } @@ -235,8 +240,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) continue; kip->ngarbage = 0; /* we will collect all garbages */ for (i = 0; i < slots_per_page(c); i++) { - if (kip->slot_used[i] == SLOT_DIRTY && - collect_one_slot(kip, i)) + if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i)) break; } } @@ -248,29 +252,60 @@ void __free_insn_slot(struct kprobe_insn_cache *c, kprobe_opcode_t *slot, int dirty) { struct kprobe_insn_page *kip; + long idx; mutex_lock(&c->mutex); - list_for_each_entry(kip, &c->pages, list) { - long idx = ((long)slot - (long)kip->insns) / - (c->insn_size * sizeof(kprobe_opcode_t)); - if (idx >= 0 && idx < slots_per_page(c)) { - WARN_ON(kip->slot_used[idx] != SLOT_USED); - if (dirty) { - kip->slot_used[idx] = SLOT_DIRTY; - kip->ngarbage++; - if (++c->nr_garbage > slots_per_page(c)) - collect_garbage_slots(c); - } else - collect_one_slot(kip, idx); + rcu_read_lock(); + list_for_each_entry_rcu(kip, &c->pages, list) { + idx = ((long)slot - (long)kip->insns) / + (c->insn_size * sizeof(kprobe_opcode_t)); + if (idx >= 0 && idx < slots_per_page(c)) goto out; - } } - /* Could not free this slot. */ + /* Could not find this slot. */ WARN_ON(1); + kip = NULL; out: + rcu_read_unlock(); + /* Mark and sweep: this may sleep */ + if (kip) { + /* Check double free */ + WARN_ON(kip->slot_used[idx] != SLOT_USED); + if (dirty) { + kip->slot_used[idx] = SLOT_DIRTY; + kip->ngarbage++; + if (++c->nr_garbage > slots_per_page(c)) + collect_garbage_slots(c); + } else { + collect_one_slot(kip, idx); + } + } mutex_unlock(&c->mutex); } +/* + * Check given address is on the page of kprobe instruction slots. + * This will be used for checking whether the address on a stack + * is on a text area or not. + */ +bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr) +{ + struct kprobe_insn_page *kip; + bool ret = false; + + rcu_read_lock(); + list_for_each_entry_rcu(kip, &c->pages, list) { + if (addr >= (unsigned long)kip->insns && + addr < (unsigned long)kip->insns + PAGE_SIZE) { + ret = true; + break; + } + } + rcu_read_unlock(); + + return ret; +} + #ifdef CONFIG_OPTPROBES /* For optimized_kprobe buffer */ struct kprobe_insn_cache kprobe_optinsn_slots = { -- cgit v1.2.3 From c31cc6a5187e8b09ccee34f81728a90f80e872e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:43 +0100 Subject: sched/cputime: Allow accounting system time using cpustat index In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay cputime accounting to the tick, let's provide APIs to account system time to precise contexts: hardirq, softirq, pure system, ... Inspired-by: Martin Schwidefsky Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 ++ kernel/sched/cputime.c | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 00f776816aa3..14b63bb714d4 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -80,6 +80,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) extern void account_user_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); +extern void account_system_index_time(struct task_struct *, cputime_t, + enum cpu_usage_stat); extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 7700a9cba335..aad42835938c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -176,8 +176,8 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime) * @cputime: the cpu time spent in kernel space since the last update * @index: pointer to cpustat field that has to be updated */ -static inline -void __account_system_time(struct task_struct *p, cputime_t cputime, int index) +void account_system_index_time(struct task_struct *p, + cputime_t cputime, enum cpu_usage_stat index) { /* Add system time to process. */ p->stime += cputime; @@ -213,7 +213,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, else index = CPUTIME_SYSTEM; - __account_system_time(p, cputime, index); + account_system_index_time(p, cputime, index); } /* @@ -400,7 +400,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, * So, we have to handle it separately here. * Also, p->stime needs to be updated for ksoftirqd. */ - __account_system_time(p, cputime, CPUTIME_SOFTIRQ); + account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); } else if (user_tick) { account_user_time(p, cputime); } else if (p == rq->idle) { @@ -408,7 +408,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, } else if (p->flags & PF_VCPU) { /* System time or guest time */ account_guest_time(p, cputime); } else { - __account_system_time(p, cputime, CPUTIME_SYSTEM); + account_system_index_time(p, cputime, CPUTIME_SYSTEM); } } -- cgit v1.2.3 From 1213699ab426608ff1925ab263dd6925102bb92a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:44 +0100 Subject: sched/cputime: Export account_guest_time() In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay cputime accounting to the tick, let's allow archs to account cputime directly to gtime. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + kernel/sched/cputime.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 14b63bb714d4..cfd6c0c6d4e8 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,6 +79,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) } extern void account_user_time(struct task_struct *, cputime_t); +extern void account_guest_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_system_index_time(struct task_struct *, cputime_t, enum cpu_usage_stat); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index aad42835938c..5813ee4a5168 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -151,7 +151,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update */ -static void account_guest_time(struct task_struct *p, cputime_t cputime) +void account_guest_time(struct task_struct *p, cputime_t cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; -- cgit v1.2.3 From c8d7dabf8f91fadd265e6eb87afb201d14ea299b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:50 +0100 Subject: sched/cputime: Rename vtime_account_user() to vtime_flush() CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y used to accumulate user time and account it on ticks and context switches only through the vtime_account_user() function. Now this model has been generalized on the 3 archs for all kind of cputime (system, irq, ...) and all the cputime flushing happens under vtime_account_user(). So let's rename this function to better reflect its new role. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Martin Schwidefsky Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-11-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/ia64/kernel/time.c | 2 +- arch/powerpc/kernel/time.c | 6 ++---- arch/s390/kernel/vtime.c | 2 +- include/linux/kernel_stat.h | 2 +- include/linux/vtime.h | 7 +++++-- kernel/sched/cputime.c | 4 +--- 6 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 37f1b315d9b6..d040f12ea9f9 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -61,7 +61,7 @@ static struct clocksource *itc_clocksource; extern cputime_t cycle_to_cputime(u64 cyc); -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); cputime_t delta; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4255e6930ac1..02e97305d22b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -382,15 +382,13 @@ void vtime_account_idle(struct task_struct *tsk) } /* - * Transfer the user time accumulated in the paca - * by the exception entry and exit code to the generic - * process user time records. + * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_system/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { struct cpu_accounting_data *acct = get_accounting(tsk); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 1a53e0bdc90a..0a9e5d67547d 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -214,7 +214,7 @@ void vtime_task_switch(struct task_struct *prev) * accounting system time in order to correctly compute * the stolen time accounting. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { if (do_account_vtime(tsk)) virt_timer_expire(); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cfd6c0c6d4e8..c3e38ded2d73 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -89,7 +89,7 @@ extern void account_idle_time(cputime_t); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { - vtime_account_user(tsk); + vtime_flush(tsk); } #else extern void account_process_tick(struct task_struct *, int user); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index aa9bfea8804a..0681fe25abeb 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -58,27 +58,28 @@ static inline void vtime_task_switch(struct task_struct *prev) extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); -extern void vtime_account_user(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_user(struct task_struct *tsk) { } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_user(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk); static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); } + extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ +static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { } @@ -93,9 +94,11 @@ static inline void vtime_account_irq_exit(struct task_struct *tsk) /* On hard|softirq exit we always account to hard|softirq cputime */ vtime_account_system(tsk); } +extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static inline void vtime_account_irq_enter(struct task_struct *tsk) { } static inline void vtime_account_irq_exit(struct task_struct *tsk) { } +static inline void vtime_flush(struct task_struct *tsk) { } #endif diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5813ee4a5168..f7c14cc71d06 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -437,9 +437,7 @@ void vtime_common_task_switch(struct task_struct *prev) else vtime_account_system(prev); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - vtime_account_user(prev); -#endif + vtime_flush(prev); arch_vtime_task_switch(prev); } #endif -- cgit v1.2.3 From 642fa448ae6b3a4e5e8737054a094173405b7643 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 3 Jan 2017 13:43:14 -0800 Subject: sched/core: Remove set_task_state() This is a nasty interface and setting the state of a foreign task must not be done. As of the following commit: be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()") ... everyone in the kernel calls set_task_state() with current, allowing the helper to be removed. However, as the comment indicates, it is still around for those archs where computing current is more expensive than using a pointer, at least in theory. An important arch that is affected is arm64, however this has been addressed now [1] and performance is up to par making no difference with either calls. Of all the callers, if any, it's the locking bits that would care most about this -- ie: we end up passing a tsk pointer to a lot of the lock slowpath, and setting ->state on that. The following numbers are based on two tests: a custom ad-hoc microbenchmark that just measures latencies (for ~65 million calls) between get_task_state() vs get_current_state(). Secondly for a higher overview, an unlink microbenchmark was used, which pounds on a single file with open, close,unlink combos with increasing thread counts (up to 4x ncpus). While the workload is quite unrealistic, it does contend a lot on the inode mutex or now rwsem. [1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com == 1. x86-64 == Avg runtime set_task_state(): 601 msecs Avg runtime set_current_state(): 552 msecs vanilla dirty Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%) Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%) Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%) Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%) Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%) Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%) Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%) Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%) Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%) Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%) Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%) Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%) Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%) == 2. ppc64le == Avg runtime set_task_state(): 938 msecs Avg runtime set_current_state: 940 msecs vanilla dirty Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%) Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%) Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%) Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%) Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%) Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%) Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%) Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%) Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%) Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%) Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%) Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%) Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%) Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%) Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%) Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%) x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching) and ppc64 (with paca) show similar improvements in the unlink microbenches. The small delta for ppc64 (2ms), does not represent the gains on the unlink runs. In the case of x86, there was a decent amount of variation in the latency runs, but always within a 20 to 50ms increase), ppc was more constant. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: mark.rutland@arm.com Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/um/drivers/random.c | 2 +- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-crypt.c | 4 ++-- drivers/md/persistent-data/dm-block-manager.c | 4 ++-- .../staging/lustre/lnet/libcfs/linux/linux-debug.c | 2 +- drivers/tty/tty_ldsem.c | 10 ++++---- include/linux/sched.h | 27 +--------------------- kernel/exit.c | 4 ++-- kernel/locking/mutex.c | 8 +++---- kernel/locking/rwsem-spinlock.c | 8 +++---- kernel/locking/rwsem-xadd.c | 4 ++-- kernel/locking/semaphore.c | 2 +- 12 files changed, 26 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 05523f14d7b2..57f03050c850 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -76,7 +76,7 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, add_sigio_fd(random_fd); add_wait_queue(&host_read_wait, &wait); - set_task_state(current, TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(&host_read_wait, &wait); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 84d2f0e4c754..d36d427a9efb 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -794,7 +794,7 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c) DECLARE_WAITQUEUE(wait, current); add_wait_queue(&c->free_buffer_wait, &wait); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); dm_bufio_unlock(c); io_schedule(); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 7c6c57216bf2..96692d13a6e4 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1210,14 +1210,14 @@ continue_locked: spin_unlock_irq(&cc->write_thread_wait.lock); if (unlikely(kthread_should_stop())) { - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); remove_wait_queue(&cc->write_thread_wait, &wait); break; } schedule(); - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); spin_lock_irq(&cc->write_thread_wait.lock); __remove_wait_queue(&cc->write_thread_wait, &wait); goto continue_locked; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a6dde7cab458..758d90cc2733 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -120,7 +120,7 @@ static int __check_holder(struct block_lock *lock) static void __wait(struct waiter *w) { for (;;) { - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!w->task) break; @@ -128,7 +128,7 @@ static void __wait(struct waiter *w) schedule(); } - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); } static void __wake_waiter(struct waiter *w) diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c index 39a72e3f0c18..7035356e56b3 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c @@ -107,7 +107,7 @@ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) libcfs_debug_dumplog(); if (libcfs_panic_on_lbug) panic("LBUG"); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); while (1) schedule(); } diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 3e6722954f29..9229de43e19d 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -231,7 +231,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) /* wait to be given the lock */ for (;;) { - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!waiter.task) break; @@ -240,7 +240,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) timeout = schedule_timeout(timeout); } - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); if (!timeout) { /* lock timed out but check if this task was just @@ -289,14 +289,14 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) waiter.task = current; - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; @@ -307,7 +307,7 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..f4f9d32f12b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -227,7 +227,7 @@ extern void proc_sched_set_task(struct task_struct *p); extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -/* Convenience macros for the sake of set_task_state */ +/* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) @@ -254,17 +254,6 @@ extern char ___assert_task_state[1 - 2*!!( #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -#define __set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - (tsk)->state = (state_value); \ - } while (0) -#define set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ - } while (0) - #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -277,20 +266,6 @@ extern char ___assert_task_state[1 - 2*!!( } while (0) #else - -/* - * @tsk had better be current, or you get to keep the pieces. - * - * The only reason is that computing current can be more expensive than - * using a pointer that's already available. - * - * Therefore, see set_current_state(). - */ -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - smp_store_mb((tsk)->state, (state_value)) - /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to diff --git a/kernel/exit.c b/kernel/exit.c index 2385d434a46e..27c68653e2fc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -501,12 +501,12 @@ static void exit_mm(void) complete(&core_state->startup); for (;;) { - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!self.task) /* see coredump_finish() */ break; freezable_schedule(); } - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); down_read(&mm->mmap_sem); } atomic_inc(&mm->mm_count); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4c7d04362c95..97d142486f93 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -666,7 +666,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, lock_contended(&lock->dep_map, ip); - set_task_state(current, state); + set_current_state(state); for (;;) { /* * Once we hold wait_lock, we're serialized against @@ -701,7 +701,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); } - set_task_state(current, state); + set_current_state(state); /* * Here we order against unlock; we must either see it change * state back to RUNNING and fall through the next schedule(), @@ -715,7 +715,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } spin_lock_mutex(&lock->wait_lock, flags); acquired: - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) @@ -735,7 +735,7 @@ skip_wait: return 0; err: - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); mutex_remove_waiter(lock, &waiter, current); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 60d15d3bb2a8..5eacab880f67 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -139,7 +139,7 @@ void __sched __down_read(struct rw_semaphore *sem) goto out; } - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); /* set up my own style of waitqueue */ waiter.task = current; @@ -156,10 +156,10 @@ void __sched __down_read(struct rw_semaphore *sem) if (!waiter.task) break; schedule(); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); } - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); out: ; } @@ -216,7 +216,7 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) ret = -EINTR; goto out; } - set_task_state(current, state); + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); raw_spin_lock_irqsave(&sem->wait_lock, flags); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index d3b819c7f07f..a3a381aee24b 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -253,13 +253,13 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* wait to be given the lock */ while (true) { - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!waiter.task) break; schedule(); } - __set_task_state(current, TASK_RUNNING); + __set_current_state(TASK_RUNNING); return sem; } EXPORT_SYMBOL(rwsem_down_read_failed); diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 29aac9f9e5e4..9512e37637dc 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -215,7 +215,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state, goto interrupted; if (unlikely(timeout <= 0)) goto timed_out; - __set_task_state(current, state); + __set_current_state(state); raw_spin_unlock_irq(&sem->lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->lock); -- cgit v1.2.3 From 8f95c90ceb541a38ac16fec48c05142ef1450c25 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:25 -0800 Subject: sched/wait, RCU: Introduce rcuwait machinery rcuwait provides support for (single) RCU-safe task wait/wake functionality, with the caveat that it must not be called after exit_notify(), such that we avoid racing with rcu delayed_put_task_struct callbacks, task_struct being rcu unaware in this context -- for which we similarly have task_rcu_dereference() magic, but with different return semantics, which can conflict with the wakeup side. The interfaces are quite straightforward: rcuwait_wait_event() rcuwait_wake_up() More details are in the comments, but it's perhaps worth mentioning at least, that users must provide proper serialization when waiting on a condition, and avoid corrupting a concurrent waiter. Also care must be taken between the task and the condition for when calling the wakeup -- we cannot miss wakeups. When porting users, this is for example, a given when using waitqueues in that everything is done under the q->lock. As such, it can remove sources of non preemptable unbounded work for realtime. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/rcuwait.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/exit.c | 30 +++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 include/linux/rcuwait.h (limited to 'include') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h new file mode 100644 index 000000000000..0e93d56c7ab2 --- /dev/null +++ b/include/linux/rcuwait.h @@ -0,0 +1,63 @@ +#ifndef _LINUX_RCUWAIT_H_ +#define _LINUX_RCUWAIT_H_ + +#include + +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner; where it is forbidden to use + * after exit_notify(). task_struct is not properly rcu protected, + * unless dealing with rcu-aware lists, ie: find_task_by_*(). + * + * Alternatively we have task_rcu_dereference(), but the return + * semantics have different implications which would break the + * wakeup side. The only time @task is non-nil is when a user is + * blocked (or checking if it needs to) on a condition, and reset + * as soon as we know that the condition has succeeded and are + * awoken. + */ +struct rcuwait { + struct task_struct *task; +}; + +#define __RCUWAIT_INITIALIZER(name) \ + { .task = NULL, } + +static inline void rcuwait_init(struct rcuwait *w) +{ + w->task = NULL; +} + +extern void rcuwait_wake_up(struct rcuwait *w); + +/* + * The caller is responsible for locking around rcuwait_wait_event(), + * such that writes to @task are properly serialized. + */ +#define rcuwait_wait_event(w, condition) \ +({ \ + /* \ + * Complain if we are called after do_exit()/exit_notify(), \ + * as we cannot rely on the rcu critical region for the \ + * wakeup side. \ + */ \ + WARN_ON(current->exit_state); \ + \ + rcu_assign_pointer((w)->task, current); \ + for (;;) { \ + /* \ + * Implicit barrier (A) pairs with (B) in \ + * rcuwait_trywake(). \ + */ \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + \ + schedule(); \ + } \ + \ + WRITE_ONCE((w)->task, NULL); \ + __set_current_state(TASK_RUNNING); \ +}) + +#endif /* _LINUX_RCUWAIT_H_ */ diff --git a/kernel/exit.c b/kernel/exit.c index 27c68653e2fc..a9441da69e29 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -282,6 +283,35 @@ retry: return task; } +void rcuwait_wake_up(struct rcuwait *w) +{ + struct task_struct *task; + + rcu_read_lock(); + + /* + * Order condition vs @task, such that everything prior to the load + * of @task is visible. This is the condition as to why the user called + * rcuwait_trywake() in the first place. Pairs with set_current_state() + * barrier (A) in rcuwait_wait_event(). + * + * WAIT WAKE + * [S] tsk = current [S] cond = true + * MB (A) MB (B) + * [L] cond [L] tsk + */ + smp_rmb(); /* (B) */ + + /* + * Avoid using task_rcu_dereference() magic as long as we are careful, + * see comment in rcuwait_wait_event() regarding ->exit_state. + */ + task = rcu_dereference(w->task); + if (task) + wake_up_process(task); + rcu_read_unlock(); +} + struct task_struct *try_get_task_struct(struct task_struct **ptask) { struct task_struct *task; -- cgit v1.2.3 From 52b94129f274937e4c25dd17b76697664a3c43c9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:26 -0800 Subject: locking/percpu-rwsem: Replace waitqueue with rcuwait The use of any kind of wait queue is an overkill for pcpu-rwsems. While one option would be to use the less heavy simple (swait) flavor, this is still too much for what pcpu-rwsems needs. For one, we do not care about any sort of queuing in that the only (rare) time writers (and readers, for that matter) are queued is when trying to acquire the regular contended rw_sem. There cannot be any further queuing as writers are serialized by the rw_sem in the first place. Given that percpu_down_write() must not be called after exit_notify(), we can replace the bulky waitqueue with rcuwait such that a writer can wait for its turn to take the lock. As such, we can avoid the queue handling and locking overhead. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 8 ++++---- kernel/locking/percpu-rwsem.c | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5b2e6159b744..93664f022ecf 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -4,15 +4,15 @@ #include #include #include -#include +#include #include #include struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; - wait_queue_head_t writer; + struct rw_semaphore rw_sem; /* slowpath */ + struct rcuwait writer; /* blocked writer */ int readers_block; }; @@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + .writer = __RCUWAIT_INITIALIZER(name.writer), \ } extern int __percpu_down_read(struct percpu_rw_semaphore *, int); diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index ce182599cf2e..883cf1b92d90 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); __init_rwsem(&sem->rw_sem, name, rwsem_key); - init_waitqueue_head(&sem->writer); + rcuwait_init(&sem->writer); sem->readers_block = 0; return 0; } @@ -103,7 +102,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) __this_cpu_dec(*sem->read_count); /* Prod writer to recheck readers_active */ - wake_up(&sem->writer); + rcuwait_wake_up(&sem->writer); } EXPORT_SYMBOL_GPL(__percpu_up_read); @@ -160,7 +159,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) */ /* Wait for all now active readers to complete. */ - wait_event(sem->writer, readers_active_check(sem)); + rcuwait_wait_event(&sem->writer, readers_active_check(sem)); } EXPORT_SYMBOL_GPL(percpu_down_write); -- cgit v1.2.3 From e274795ea7b7caa0fd74ef651594382a69e2a951 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Jan 2017 14:17:48 +0100 Subject: locking/mutex: Fix mutex handoff While reviewing the ww_mutex patches, I noticed that it was still possible to (incorrectly) succeed for (incorrect) code like: mutex_lock(&a); mutex_lock(&a); This was possible if the second mutex_lock() would block (as expected) but then receive a spurious wakeup. At that point it would find itself at the front of the queue, request a handoff and instantly claim ownership and continue, since owner would point to itself. Avoid this scenario and simplify the code by introducing a third low bit to signal handoff pickup. So once we request handoff, unlock clears the handoff bit and sets the pickup bit along with the new owner. This also removes the need for the .handoff argument to __mutex_trylock(), since that becomes superfluous with PICKUP. In order to guarantee enough low bits, ensure task_struct alignment is at least L1_CACHE_BYTES (which seems a good ideal regardless). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation") Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- kernel/fork.c | 6 ++- kernel/locking/mutex.c | 108 ++++++++++++++++++++++++------------------------- 3 files changed, 57 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b97870f2debd..3e1fccb47f11 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -65,7 +65,7 @@ struct mutex { static inline struct task_struct *__mutex_owner(struct mutex *lock) { - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 11c5c8ab827c..a90510d0bbf8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -432,11 +432,13 @@ void __init fork_init(void) int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN -#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES +#define ARCH_MIN_TASKALIGN 0 #endif + int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", - arch_task_struct_size, ARCH_MIN_TASKALIGN, + arch_task_struct_size, align, SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 97d142486f93..24284497c425 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init); /* * @owner: contains: 'struct task_struct *' to the current lock owner, * NULL means not owned. Since task_struct pointers are aligned at - * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low - * bits to store extra state. + * at least L1_CACHE_BYTES, we have low bits to store extra state. * * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. * Bit1 indicates unlock needs to hand the lock to the top-waiter + * Bit2 indicates handoff has been done and we're waiting for pickup. */ #define MUTEX_FLAG_WAITERS 0x01 #define MUTEX_FLAG_HANDOFF 0x02 +#define MUTEX_FLAG_PICKUP 0x04 -#define MUTEX_FLAGS 0x03 +#define MUTEX_FLAGS 0x07 static inline struct task_struct *__owner_task(unsigned long owner) { @@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner) } /* - * Actual trylock that will work on any unlocked state. - * - * When setting the owner field, we must preserve the low flag bits. - * - * Be careful with @handoff, only set that in a wait-loop (where you set - * HANDOFF) to avoid recursive lock attempts. + * Trylock variant that retuns the owning task on failure. */ -static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) +static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) { unsigned long owner, curr = (unsigned long)current; owner = atomic_long_read(&lock->owner); for (;;) { /* must loop, can race against a flag */ unsigned long old, flags = __owner_flags(owner); + unsigned long task = owner & ~MUTEX_FLAGS; + + if (task) { + if (likely(task != curr)) + break; + + if (likely(!(flags & MUTEX_FLAG_PICKUP))) + break; - if (__owner_task(owner)) { - if (handoff && unlikely(__owner_task(owner) == current)) { - /* - * Provide ACQUIRE semantics for the lock-handoff. - * - * We cannot easily use load-acquire here, since - * the actual load is a failed cmpxchg, which - * doesn't imply any barriers. - * - * Also, this is a fairly unlikely scenario, and - * this contains the cost. - */ - smp_mb(); /* ACQUIRE */ - return true; - } - - return false; + flags &= ~MUTEX_FLAG_PICKUP; + } else { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP); +#endif } /* @@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) * past the point where we acquire it. This would be possible * if we (accidentally) set the bit on an unlocked mutex. */ - if (handoff) - flags &= ~MUTEX_FLAG_HANDOFF; + flags &= ~MUTEX_FLAG_HANDOFF; old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); if (old == owner) - return true; + return NULL; owner = old; } + + return __owner_task(owner); +} + +/* + * Actual trylock that will work on any unlocked state. + */ +static inline bool __mutex_trylock(struct mutex *lock) +{ + return !__mutex_trylock_or_owner(lock); } #ifndef CONFIG_DEBUG_LOCK_ALLOC @@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait /* * Give up ownership to a specific task, when @task = NULL, this is equivalent - * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE - * semantics like a regular unlock, the __mutex_trylock() provides matching - * ACQUIRE semantics for the handoff. + * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves + * WAITERS. Provides RELEASE semantics like a regular unlock, the + * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. */ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) { @@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); #endif new = (owner & MUTEX_FLAG_WAITERS); new |= (unsigned long)task; + if (task) + new |= MUTEX_FLAG_PICKUP; old = atomic_long_cmpxchg_release(&lock->owner, owner, new); if (old == owner) @@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx, const bool waiter) { - struct task_struct *task = current; - if (!waiter) { /* * The purpose of the mutex_can_spin_on_owner() function is @@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock, goto fail_unlock; } + /* Try to acquire the mutex... */ + owner = __mutex_trylock_or_owner(lock); + if (!owner) + break; + /* - * If there's an owner, wait for it to either + * There's an owner, wait for it to either * release the lock or go to sleep. */ - owner = __mutex_owner(lock); - if (owner) { - if (waiter && owner == task) { - smp_mb(); /* ACQUIRE */ - break; - } - - if (!mutex_spin_on_owner(lock, owner)) - goto fail_unlock; - } - - /* Try to acquire the mutex if it is unlocked. */ - if (__mutex_trylock(lock, waiter)) - break; + if (!mutex_spin_on_owner(lock, owner)) + goto fail_unlock; /* * The cpu_relax() call is a compiler barrier which forces @@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - if (__mutex_trylock(lock, false) || + if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); @@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* * After waiting to acquire the wait_lock, try again. */ - if (__mutex_trylock(lock, false)) + if (__mutex_trylock(lock)) goto skip_wait; debug_mutex_lock_common(lock, &waiter); @@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * before testing the error conditions to make sure we pick up * the handoff. */ - if (__mutex_trylock(lock, first)) + if (__mutex_trylock(lock)) goto acquired; /* @@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * state back to RUNNING and fall through the next schedule(), * or we must see its unlock and acquire. */ - if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) || - __mutex_trylock(lock, first)) + if (__mutex_trylock(lock) || + (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true))) break; spin_lock_mutex(&lock->wait_lock, flags); @@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); #endif if (owner & MUTEX_FLAG_HANDOFF) @@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, */ int __sched mutex_trylock(struct mutex *lock) { - bool locked = __mutex_trylock(lock, false); + bool locked = __mutex_trylock(lock); if (locked) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -- cgit v1.2.3 From ea9e0fb8fe1bdfca81bd76052a5cce70bb053430 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 21 Dec 2016 19:46:32 +0100 Subject: locking/ww_mutex: Set use_ww_ctx even when locking without a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add a new field to struct mutex_waiter. This field must be initialized for all waiters if any waiter uses the ww_use_ctx path. So there is a trade-off: Keep ww_mutex locking without a context on the faster non-use_ww_ctx path, at the cost of adding the initialization to all mutex locks (including non-ww_mutexes), or avoid the additional cost for non-ww_mutex locks, at the cost of adding additional checks to the use_ww_ctx path. We take the latter choice. It may be worth eliminating the users of ww_mutex_lock(lock, NULL), but there are a lot of them. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-5-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 11 ++--------- kernel/locking/mutex.c | 29 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 7b0066814fa0..5f2e8379baff 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -222,11 +222,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, */ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock(lock, ctx); - - mutex_lock(&lock->base); - return 0; + return __ww_mutex_lock(lock, ctx); } /** @@ -262,10 +258,7 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock_interruptible(lock, ctx); - else - return mutex_lock_interruptible(&lock->base); + return __ww_mutex_lock_interruptible(lock, ctx); } /** diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 9ad03b9a5f7f..44a64c0a851a 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -469,7 +469,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, for (;;) { struct task_struct *owner; - if (use_ww_ctx && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -629,8 +629,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct ww_mutex *ww; int ret; - if (use_ww_ctx) { - ww = container_of(lock, struct ww_mutex, base); + ww = container_of(lock, struct ww_mutex, base); + + if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; } @@ -642,7 +643,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) + if (use_ww_ctx && ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; @@ -688,7 +689,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto err; } - if (use_ww_ctx && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) { ret = __ww_mutex_lock_check_stamp(lock, ww_ctx); if (ret) goto err; @@ -728,7 +729,7 @@ skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) + if (use_ww_ctx && ww_ctx) ww_mutex_set_context_slowpath(ww, ww_ctx); spin_unlock_mutex(&lock->wait_lock, flags); @@ -816,8 +817,9 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); - if (!ret && ctx->acquired > 1) + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx, 1); + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; @@ -831,9 +833,10 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx, 1); - if (!ret && ctx->acquired > 1) + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; @@ -1021,7 +1024,8 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); if (__mutex_trylock_fast(&lock->base)) { - ww_mutex_set_context_fastpath(lock, ctx); + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); return 0; } @@ -1035,7 +1039,8 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); if (__mutex_trylock_fast(&lock->base)) { - ww_mutex_set_context_fastpath(lock, ctx); + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); return 0; } -- cgit v1.2.3 From c5470b22d1833241cae996d23a8a346ff8ec4d58 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 21 Dec 2016 19:46:33 +0100 Subject: locking/ww_mutex: Remove the __ww_mutex_lock*() inline wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the documentation in the header file since there is no good place for it in mutex.c: there are two rather different implementations with different EXPORT_SYMBOLs for each function. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-6-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 18 ++++-------------- kernel/locking/mutex.c | 16 ++++++++-------- 2 files changed, 12 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 5f2e8379baff..c07528522bbc 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) #endif } -extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); -extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); - /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired @@ -220,10 +215,8 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock(lock, ctx); -} +extern int __must_check ww_mutex_lock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible @@ -255,11 +248,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock_interruptible(lock, ctx); -} +extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 44a64c0a851a..c696614a6b8b 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -811,7 +811,7 @@ ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) } int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; @@ -824,10 +824,10 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock); +EXPORT_SYMBOL_GPL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; @@ -841,7 +841,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); #endif @@ -1019,7 +1019,7 @@ EXPORT_SYMBOL(mutex_trylock); #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { might_sleep(); @@ -1031,10 +1031,10 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) return __ww_mutex_lock_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock); +EXPORT_SYMBOL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { might_sleep(); @@ -1046,7 +1046,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) return __ww_mutex_lock_interruptible_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL(ww_mutex_lock_interruptible); #endif -- cgit v1.2.3 From 6baa5c60a97d7f1a37a4b5ab5fb3a450e56e8b06 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 21 Dec 2016 19:46:34 +0100 Subject: locking/ww_mutex: Add waiters in stamp order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add regular waiters in stamp order. Keep adding waiters that have no context in FIFO order and take care not to starve them. While adding our task as a waiter, back off if we detect that there is a waiter with a lower stamp in front of us. Make sure to call lock_contended even when we back off early. For w/w mutexes, being first in the wait list is only stable when taking the lock without a context. Therefore, the purpose of the first flag is split into two: 'first' remains to indicate whether we want to spin optimistically, while 'handoff' indicates that we should be prepared to accept a handoff. For w/w locking with a context, we always accept handoffs after the first schedule(), to handle the following sequence of events: 1. Task #0 unlocks and hands off to Task #2 which is first in line 2. Task #1 adds itself in front of Task #2 3. Task #2 wakes up and must accept the handoff even though it is no longer first in line Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-7-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 3 ++ kernel/locking/mutex.c | 76 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3e1fccb47f11..e17942ffb3fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,8 @@ #include #include +struct ww_acquire_ctx; + /* * Simple, straightforward mutexes with strict semantics: * @@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock) struct mutex_waiter { struct list_head list; struct task_struct *task; + struct ww_acquire_ctx *ww_ctx; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c696614a6b8b..d0f7628b5a3d 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -615,6 +615,52 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) return 0; } +static inline int __sched +__ww_mutex_add_waiter(struct mutex_waiter *waiter, + struct mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter *cur; + struct list_head *pos; + + if (!ww_ctx) { + list_add_tail(&waiter->list, &lock->wait_list); + return 0; + } + + /* + * Add the waiter before the first waiter with a higher stamp. + * Waiters without a context are skipped to avoid starving + * them. + */ + pos = &lock->wait_list; + list_for_each_entry_reverse(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { + /* Back off immediately if necessary. */ + if (ww_ctx->acquired > 0) { +#ifdef CONFIG_DEBUG_MUTEXES + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); + ww_ctx->contending_lock = ww; +#endif + return -EDEADLK; + } + + break; + } + + pos = &cur->list; + } + + list_add_tail(&waiter->list, pos); + return 0; +} + /* * Lock a mutex (possibly interruptible), slowpath: */ @@ -659,15 +705,25 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, debug_mutex_lock_common(lock, &waiter); debug_mutex_add_waiter(lock, &waiter, current); - /* add waiting tasks to the end of the waitqueue (FIFO): */ - list_add_tail(&waiter.list, &lock->wait_list); + lock_contended(&lock->dep_map, ip); + + if (!use_ww_ctx) { + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + } else { + /* Add in stamp order, waking up waiters that must back off. */ + ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); + if (ret) + goto err_early_backoff; + + waiter.ww_ctx = ww_ctx; + } + waiter.task = current; if (__mutex_waiter_is_first(lock, &waiter)) __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); - lock_contended(&lock->dep_map, ip); - set_current_state(state); for (;;) { /* @@ -698,9 +754,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, spin_unlock_mutex(&lock->wait_lock, flags); schedule_preempt_disabled(); - if (!first && __mutex_waiter_is_first(lock, &waiter)) { - first = true; - __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); + /* + * ww_mutex needs to always recheck its position since its waiter + * list is not FIFO ordered. + */ + if ((use_ww_ctx && ww_ctx) || !first) { + first = __mutex_waiter_is_first(lock, &waiter); + if (first) + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); } set_current_state(state); @@ -739,6 +800,7 @@ skip_wait: err: __set_current_state(TASK_RUNNING); mutex_remove_waiter(lock, &waiter, current); +err_early_backoff: spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); -- cgit v1.2.3 From 977625a693283dbb35b2a3f674bc0237f7347348 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 21 Dec 2016 19:46:39 +0100 Subject: locking/mutex: Initialize mutex_waiter::ww_ctx with poison when debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Help catch cases where mutex_lock is used directly on w/w mutexes, which otherwise result in the w/w tasks reading uninitialized data. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-12-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/poison.h | 1 + kernel/locking/mutex.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 51334edec506..a39540326417 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -80,6 +80,7 @@ /********** kernel/mutexes **********/ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 +#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) /********** lib/flex_array.c **********/ #define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cd8598aa0426..935116723a3d 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -785,6 +785,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (!use_ww_ctx) { /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); + +#ifdef CONFIG_DEBUG_MUTEXES + waiter.ww_ctx = MUTEX_POISON_WW_CTX; +#endif } else { /* Add in stamp order, waking up waiters that must back off. */ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); -- cgit v1.2.3 From 12907fbb1a691807bb0420a27126e15934cb7954 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Dec 2016 11:44:28 +0100 Subject: sched/clock, clocksource: Add optional cs::mark_unstable() method PeterZ reported that we'd fail to mark the TSC unstable when the clocksource watchdog finds it unsuitable. Allow a clocksource to run a custom action when its being marked unstable and hook up the TSC unstable code. Reported-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 11 +++++++++++ include/linux/clocksource.h | 3 +++ kernel/time/clocksource.c | 4 ++++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index be3a49ee0356..c8174c815d83 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1106,6 +1106,16 @@ static u64 read_tsc(struct clocksource *cs) return (u64)rdtsc_ordered(); } +static void tsc_cs_mark_unstable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + tsc_unstable = 1; + clear_sched_clock_stable(); + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to clocksource watchdog\n"); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1118,6 +1128,7 @@ static struct clocksource clocksource_tsc = { CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, }; void mark_tsc_unstable(char *reason) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e315d04a2fd9..cfc75848a35d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -62,6 +62,8 @@ struct module; * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @mark_unstable: Optional function to inform the clocksource driver that + * the watchdog marked the clocksource unstable * @owner: module reference, must be set by clocksource in modules * * Note: This struct is not used in hotpathes of the timekeeping code @@ -93,6 +95,7 @@ struct clocksource { unsigned long flags; void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + void (*mark_unstable)(struct clocksource *cs); /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 665985b0a89a..93621ae718d3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -141,6 +141,10 @@ static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; + + if (cs->mark_unstable) + cs->mark_unstable(cs); + if (finished_booting) schedule_work(&watchdog_work); } -- cgit v1.2.3 From 9881b024b7d7671f6a014091bc96506b89081802 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Dec 2016 13:35:52 +0100 Subject: sched/clock: Delay switching sched_clock to stable Currently we switch to the stable sched_clock if we guess the TSC is usable, and then switch back to the unstable path if it turns out TSC isn't stable during SMP bringup after all. Delay switching to the stable path until after SMP bringup is complete. This way we'll avoid switching during the time we detect the worst of the TSC offences. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++++ init/main.c | 1 - kernel/sched/clock.c | 50 ++++++++++++++++++++++---------------------------- kernel/sched/core.c | 4 ++++ 4 files changed, 31 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..94a48bb58297 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2515,6 +2515,10 @@ extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + static inline void sched_clock_tick(void) { } @@ -2537,6 +2541,7 @@ static inline u64 local_clock(void) return sched_clock(); } #else +extern void sched_clock_init_late(void); /* * Architectures can set this to 1 if they have specified * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, diff --git a/init/main.c b/init/main.c index b0c9d6facef9..19228149386c 100644 --- a/init/main.c +++ b/init/main.c @@ -625,7 +625,6 @@ asmlinkage __visible void __init start_kernel(void) numa_policy_init(); if (late_time_init) late_time_init(); - sched_clock_init(); calibrate_delay(); pidmap_init(); anon_vma_init(); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 5d6dd38b449c..b3466d4e0cc2 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,6 +77,11 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; +void sched_clock_init(void) +{ + sched_clock_running = 1; +} + #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); static int __sched_clock_stable_early; @@ -96,12 +101,18 @@ void set_sched_clock_stable(void) { __sched_clock_stable_early = 1; - smp_mb(); /* matches sched_clock_init() */ - - if (!sched_clock_running) - return; + smp_mb(); /* matches sched_clock_init_late() */ - __set_sched_clock_stable(); + /* + * This really should only be called early (before + * sched_clock_init_late()) when guestimating our sched_clock() is + * solid. + * + * After that we test stability and we can negate our guess using + * clear_sched_clock_stable, possibly from a watchdog. + */ + if (WARN_ON_ONCE(sched_clock_running == 2)) + __set_sched_clock_stable(); } static void __clear_sched_clock_stable(struct work_struct *work) @@ -117,12 +128,10 @@ void clear_sched_clock_stable(void) { __sched_clock_stable_early = 0; - smp_mb(); /* matches sched_clock_init() */ - - if (!sched_clock_running) - return; + smp_mb(); /* matches sched_clock_init_late() */ - schedule_work(&sched_clock_work); + if (sched_clock_running == 2) + schedule_work(&sched_clock_work); } struct sched_clock_data { @@ -143,20 +152,9 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) return &per_cpu(sched_clock_data, cpu); } -void sched_clock_init(void) +void sched_clock_init_late(void) { - u64 ktime_now = ktime_to_ns(ktime_get()); - int cpu; - - for_each_possible_cpu(cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); - - scd->tick_raw = 0; - scd->tick_gtod = ktime_now; - scd->clock = ktime_now; - } - - sched_clock_running = 1; + sched_clock_running = 2; /* * Ensure that it is impossible to not do a static_key update. @@ -362,11 +360,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - u64 sched_clock_cpu(int cpu) { if (unlikely(!sched_clock_running)) @@ -374,6 +367,7 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } + #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a129b34b8206..96a4267e6020 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7498,6 +7498,7 @@ void __init sched_init_smp(void) init_sched_dl_class(); sched_init_smt(); + sched_clock_init_late(); sched_smp_initialized = true; } @@ -7513,6 +7514,7 @@ early_initcall(migration_init); void __init sched_init_smp(void) { sched_init_granularity(); + sched_clock_init_late(); } #endif /* CONFIG_SMP */ @@ -7556,6 +7558,8 @@ void __init sched_init(void) int i, j; unsigned long alloc_size = 0, ptr; + sched_clock_init(); + for (i = 0; i < WAIT_TABLE_SIZE; i++) init_waitqueue_head(bit_wait_table + i); -- cgit v1.2.3 From 10ab56434f2f633a51e432ee8b7c29e12438e163 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Oct 2016 12:58:10 -0400 Subject: sched/core: Separate out io_schedule_prepare() and io_schedule_finish() Now that IO schedule accounting is done inside __schedule(), io_schedule() can be split into three steps - prep, schedule, and finish - where the schedule part doesn't need any special annotation. This allows marking a sleep as iowait by simply wrapping an existing blocking function with io_schedule_prepare() and io_schedule_finish(). Because task_struct->in_iowait is single bit, the caller of io_schedule_prepare() needs to record and the pass its state to io_schedule_finish() to be safe regarding nesting. While this isn't the prettiest, these functions are mostly gonna be used by core functions and we don't want to use more space for ->in_iowait. While at it, as it's simple to do now, reimplement io_schedule() without unnecessarily going through io_schedule_timeout(). Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1477673892-28940-3-git-send-email-tj@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++----- kernel/sched/core.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 94a48bb58297..a8daed914eef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -461,12 +461,10 @@ extern signed long schedule_timeout_idle(signed long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +extern int __must_check io_schedule_prepare(void); +extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); - -static inline void io_schedule(void) -{ - io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); -} +extern void io_schedule(void); void __noreturn do_task_dead(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9fd37169b302..49ce1cb3d320 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5128,25 +5128,48 @@ out_irq: } EXPORT_SYMBOL_GPL(yield_to); +int io_schedule_prepare(void) +{ + int old_iowait = current->in_iowait; + + current->in_iowait = 1; + blk_schedule_flush_plug(current); + + return old_iowait; +} + +void io_schedule_finish(int token) +{ + current->in_iowait = token; +} + /* * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. */ long __sched io_schedule_timeout(long timeout) { - int old_iowait = current->in_iowait; + int token; long ret; - current->in_iowait = 1; - blk_schedule_flush_plug(current); - + token = io_schedule_prepare(); ret = schedule_timeout(timeout); - current->in_iowait = old_iowait; + io_schedule_finish(token); return ret; } EXPORT_SYMBOL(io_schedule_timeout); +void io_schedule(void) +{ + int token; + + token = io_schedule_prepare(); + schedule(); + io_schedule_finish(token); +} +EXPORT_SYMBOL(io_schedule); + /** * sys_sched_get_priority_max - return maximum RT priority. * @policy: scheduling class. -- cgit v1.2.3 From 1460cb65a10f6c7a6e3a1c76513338861a0a43b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Oct 2016 12:58:11 -0400 Subject: locking/mutex, sched/wait: Add mutex_lock_io() We sometimes end up propagating IO blocking through mutexes; however, because there currently is no way of annotating mutex sleeps as iowait, there are cases where iowait and /proc/stat:procs_blocked report misleading numbers obscuring the actual state of the system. This patch adds mutex_lock_io() so that mutex sleeps can be marked as iowait in those cases. Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1477673892-28940-4-git-send-email-tj@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++++ kernel/locking/mutex.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b97870f2debd..980ba16b8749 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,10 +156,12 @@ extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass); +extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ do { \ @@ -171,11 +173,13 @@ do { \ extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); extern int __must_check mutex_lock_killable(struct mutex *lock); +extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) +# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) #endif /* diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 9b349619f431..8464a5cbab97 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -783,6 +783,20 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); +void __sched +mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) +{ + int token; + + might_sleep(); + + token = io_schedule_prepare(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io_nested); + static inline int ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { @@ -950,6 +964,16 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); +void __sched mutex_lock_io(struct mutex *lock) +{ + int token; + + token = io_schedule_prepare(); + mutex_lock(lock); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io); + static noinline void __sched __mutex_lock_slowpath(struct mutex *lock) { -- cgit v1.2.3 From 9e3d6223d2093a8903c8f570a06284453ee59944 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Dec 2016 09:30:11 +0100 Subject: math64, timers: Fix 32bit mul_u64_u32_shr() and friends It turns out that while GCC-4.4 manages to generate 32x32->64 mult instructions for the 32bit mul_u64_u32_shr() code, any GCC after that fails horribly. Fix this by providing an explicit mul_u32_u32() function which can be architcture provided. Reported-by: Chris Metcalf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf [for tile] Cc: Christopher S. Hall Cc: David Gibson Cc: John Stultz Cc: Laurent Vivier Cc: Liav Rehana Cc: Linus Torvalds Cc: Parit Bhargava Cc: Peter Zijlstra Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161209083011.GD15765@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/tile/include/asm/Kbuild | 1 - arch/tile/include/asm/div64.h | 14 ++++++++++++++ arch/x86/include/asm/div64.h | 11 +++++++++++ include/linux/math64.h | 26 ++++++++++++++++++-------- 4 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 arch/tile/include/asm/div64.h (limited to 'include') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 2d1f5638974c..20f2ba6d79be 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h -generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h new file mode 100644 index 000000000000..bf6161966dfa --- /dev/null +++ b/arch/tile/include/asm/div64.h @@ -0,0 +1,14 @@ +#ifndef _ASM_TILE_DIV64_H +#define _ASM_TILE_DIV64_H + +#ifdef __tilegx__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return __insn_mul_lu_lu(a, b); +} +#define mul_u32_u32 mul_u32_u32 +#endif + +#include + +#endif /* _ASM_TILE_DIV64_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index ced283ac79df..af95c47d5c9e 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -59,6 +59,17 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + #else # include #endif /* CONFIG_X86_32 */ diff --git a/include/linux/math64.h b/include/linux/math64.h index 6e8b5b270ffe..80690c96c734 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -133,6 +133,16 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#ifndef mul_u32_u32 +/* + * Many a GCC version messes this up and generates a 64x64 mult :-( + */ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr @@ -160,9 +170,9 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) al = a; ah = a >> 32; - ret = ((u64)al * mul) >> shift; + ret = mul_u32_u32(al, mul) >> shift; if (ah) - ret += ((u64)ah * mul) << (32 - shift); + ret += mul_u32_u32(ah, mul) << (32 - shift); return ret; } @@ -186,10 +196,10 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) a0.ll = a; b0.ll = b; - rl.ll = (u64)a0.l.low * b0.l.low; - rm.ll = (u64)a0.l.low * b0.l.high; - rn.ll = (u64)a0.l.high * b0.l.low; - rh.ll = (u64)a0.l.high * b0.l.high; + rl.ll = mul_u32_u32(a0.l.low, b0.l.low); + rm.ll = mul_u32_u32(a0.l.low, b0.l.high); + rn.ll = mul_u32_u32(a0.l.high, b0.l.low); + rh.ll = mul_u32_u32(a0.l.high, b0.l.high); /* * Each of these lines computes a 64-bit intermediate result into "c", @@ -229,8 +239,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } u, rl, rh; u.ll = a; - rl.ll = (u64)u.l.low * mul; - rh.ll = (u64)u.l.high * mul + rl.l.high; + rl.ll = mul_u32_u32(u.l.low, mul); + rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high; /* Bits 32-63 of the result will be in rh.l.low. */ rl.l.high = do_div(rh.ll, divisor); -- cgit v1.2.3 From af2e859edd477fa1ea3d1d106f41a595cff3d162 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2016 11:47:04 +0000 Subject: locking/ww_mutex: Fix compilation of __WW_MUTEX_INITIALIZER From conflicting macro parameters, passing the wrong name to __MUTEX_INITIALIZER and a stray '\', #define __WW_MUTEX_INITIALIZER was very unhappy. One unnecessary change was to choose to pass &ww_class instead of implicitly taking the address of the class within the macro. Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1b375dc30710 ("mutex: Move ww_mutex definitions to ww_mutex.h") Link: http://lkml.kernel.org/r/20161201114711.28697-2-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index c07528522bbc..083950fd5b0b 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -51,10 +51,10 @@ struct ww_mutex { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ - , .ww_class = &ww_class +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ + , .ww_class = class #else -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif #define __WW_CLASS_INITIALIZER(ww_class) \ @@ -63,7 +63,7 @@ struct ww_mutex { , .mutex_name = #ww_class "_mutex" } #define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = { \__MUTEX_INITIALIZER(lockname) } \ + { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } #define DEFINE_WW_CLASS(classname) \ -- cgit v1.2.3 From 1e24edca0557dba6486d39d3c24c288475432bcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:12:23 +0100 Subject: locking/atomic, kref: Add KREF_INIT() Since we need to change the implementation, stop exposing internals. Provide KREF_INIT() to allow static initialization of struct kref. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/block/drbd/drbd_bitmap.c | 2 +- fs/fuse/fuse_i.h | 2 +- include/linux/kref.h | 2 ++ init/version.c | 4 +--- kernel/pid.c | 4 +--- 5 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ab62b81c2ca7..dece26f119d4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1070,7 +1070,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned .done = 0, .flags = flags, .error = 0, - .kref = { ATOMIC_INIT(2) }, + .kref = KREF_INIT(2), }; if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91307940c8ac..052f8d3c41cb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -256,7 +256,7 @@ struct fuse_io_priv { #define FUSE_IO_PRIV_SYNC(f) \ { \ - .refcnt = { ATOMIC_INIT(1) }, \ + .refcnt = KREF_INIT(1), \ .async = 0, \ .file = f, \ } diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..9af255ad1e2f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,6 +24,8 @@ struct kref { atomic_t refcount; }; +#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } + /** * kref_init - initialize object. * @kref: object in question. diff --git a/init/version.c b/init/version.c index fe41a63efed6..5606341e9efd 100644 --- a/init/version.c +++ b/init/version.c @@ -23,9 +23,7 @@ int version_string(LINUX_VERSION_CODE); #endif struct uts_namespace init_uts_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, + .kref = KREF_INIT(2), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, diff --git a/kernel/pid.c b/kernel/pid.c index f66162f2359b..0291804151b5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -68,9 +68,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns, * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, + .kref = KREF_INIT(2), .pidmap = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, -- cgit v1.2.3 From 2c935bc57221cc2edc787c72ea0e2d30cdcd3d5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:29:48 +0100 Subject: locking/atomic, kref: Add kref_read() Since we need to change the implementation, stop exposing internals. Provide kref_read() to read the current reference count; typically used for debug messages. Kills two anti-patterns: atomic_read(&kref->refcount) kref->refcount.counter Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/rbd.c | 8 +++--- drivers/block/virtio_blk.c | 2 +- drivers/gpu/drm/drm_gem_cma_helper.c | 2 +- drivers/gpu/drm/drm_info.c | 2 +- drivers/gpu/drm/drm_mode_object.c | 4 +-- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/msm/msm_gem.c | 2 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- drivers/gpu/drm/omapdrm/omap_gem.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 4 +-- drivers/gpu/drm/ttm/ttm_object.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_cm.h | 6 ++--- drivers/infiniband/hw/cxgb3/iwch_qp.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 6 ++--- drivers/infiniband/hw/cxgb4/qp.c | 2 +- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 6 ++--- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 +-- drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/mei/debugfs.c | 2 +- drivers/pci/hotplug/pnv_php.c | 2 +- drivers/pci/slot.c | 2 +- drivers/scsi/bnx2fc/bnx2fc_io.c | 8 +++--- drivers/scsi/cxgbi/libcxgbi.h | 4 +-- drivers/scsi/lpfc/lpfc_debugfs.c | 2 +- drivers/scsi/lpfc/lpfc_els.c | 2 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 40 ++++++++++++++-------------- drivers/scsi/lpfc/lpfc_init.c | 3 +-- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 +-- drivers/staging/android/ion/ion.c | 2 +- drivers/staging/comedi/comedi_buf.c | 2 +- drivers/target/target_core_pr.c | 10 +++---- drivers/target/tcm_fc/tfc_sess.c | 2 +- drivers/usb/gadget/function/f_fs.c | 2 +- fs/exofs/sys.c | 2 +- fs/ocfs2/cluster/netdebug.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- fs/ocfs2/dlm/dlmdebug.c | 12 ++++----- fs/ocfs2/dlm/dlmdomain.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 8 +++--- fs/ocfs2/dlm/dlmunlock.c | 2 +- include/drm/drm_framebuffer.h | 2 +- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/linux/kref.h | 5 ++++ include/linux/sunrpc/cache.h | 2 +- include/net/bluetooth/hci_core.h | 4 +-- net/bluetooth/6lowpan.c | 2 +- net/bluetooth/a2mp.c | 4 +-- net/bluetooth/amp.c | 4 +-- net/bluetooth/l2cap_core.c | 4 +-- net/ceph/messenger.c | 4 +-- net/ceph/osd_client.c | 10 +++---- net/sunrpc/cache.c | 2 +- net/sunrpc/svc_xprt.c | 6 ++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 +-- 56 files changed, 121 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index de279fe4e4fd..74306c054983 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -520,7 +520,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* Completion does it's own kref_put. If we are going to * kref_sub below, we need req to be still around then. */ int at_least = k_put + !!c_put; - int refcount = atomic_read(&req->kref.refcount); + int refcount = kref_read(&req->kref); if (refcount < at_least) drbd_err(device, "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 36d2b9f4e836..436baa66f701 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1535,7 +1535,7 @@ static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_get(&obj_request->kref); } @@ -1544,14 +1544,14 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) { rbd_assert(obj_request != NULL); dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_put(&obj_request->kref, rbd_obj_request_destroy); } static void rbd_img_request_get(struct rbd_img_request *img_request) { dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); kref_get(&img_request->kref); } @@ -1562,7 +1562,7 @@ static void rbd_img_request_put(struct rbd_img_request *img_request) { rbd_assert(img_request != NULL); dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); if (img_request_child_test(img_request)) kref_put(&img_request->kref, rbd_parent_request_destroy); else diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5545a679abd8..79a346c97446 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -767,7 +767,7 @@ static void virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); + refc = kref_read(&disk_to_dev(vblk->disk)->kobj.kref); put_disk(vblk->disk); vdev->config->del_vqs(vdev); kfree(vblk->vqs); diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 1d6c335584ec..33cd51632721 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -376,7 +376,7 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj, off = drm_vma_node_start(&obj->vma_node); seq_printf(m, "%2d (%2d) %08llx %pad %p %zu", - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, &cma_obj->paddr, cma_obj->vaddr, obj->size); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index ffb2ab389d1d..6b68e9088436 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -118,7 +118,7 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, obj->handle_count, - atomic_read(&obj->refcount.refcount)); + kref_read(&obj->refcount)); return 0; } diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index 9f17085b1fdd..c6885a4911c0 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -159,7 +159,7 @@ EXPORT_SYMBOL(drm_mode_object_find); void drm_mode_object_unreference(struct drm_mode_object *obj) { if (obj->free_cb) { - DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount)); + DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_put(&obj->refcount, obj->free_cb); } } @@ -176,7 +176,7 @@ EXPORT_SYMBOL(drm_mode_object_unreference); void drm_mode_object_reference(struct drm_mode_object *obj) { if (obj->free_cb) { - DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount)); + DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_get(&obj->refcount); } } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 114dddbd297b..aa6e35ddc87f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -486,7 +486,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n", etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I', - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, etnaviv_obj->vaddr, obj->size); rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 6a368de9d81e..ecfefb9d42e4 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -256,7 +256,7 @@ extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); static inline bool i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) { - return atomic_read(&obj->base.refcount.refcount) == 0; + return kref_read(&obj->base.refcount) == 0; } static inline bool diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d8bc59c7e261..4d24d9389036 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -640,7 +640,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) seq_printf(m, "%08x: %c %2d (%2d) %08llx %p\t", msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, msm_obj->vaddr); for (id = 0; id < priv->num_aspaces; id++) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index a6126c93f215..88ee60d1b907 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -527,7 +527,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f) * caller should have a reference on the fence, * else fence could get freed here */ - WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1); + WARN_ON(kref_read(&fence->base.refcount) <= 1); /* * This needs uevents to work correctly, but dma_fence_add_callback relies on diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 4a90c690f09e..74a9968df421 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1033,7 +1033,7 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off = drm_vma_node_start(&obj->vma_node); seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d", - omap_obj->flags, obj->name, obj->refcount.refcount.counter, + omap_obj->flags, obj->name, kref_read(&obj->refcount), off, &omap_obj->paddr, omap_obj->paddr_cnt, omap_obj->vaddr, omap_obj->roll); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d5063618efa7..30aefcc0c969 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -140,8 +140,8 @@ static void ttm_bo_release_list(struct kref *list_kref) struct ttm_bo_device *bdev = bo->bdev; size_t acc_size = bo->acc_size; - BUG_ON(atomic_read(&bo->list_kref.refcount)); - BUG_ON(atomic_read(&bo->kref.refcount)); + BUG_ON(kref_read(&bo->list_kref)); + BUG_ON(kref_read(&bo->kref)); BUG_ON(atomic_read(&bo->cpu_writers)); BUG_ON(bo->mem.mm_node != NULL); BUG_ON(!list_empty(&bo->lru)); diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 4f5fa8d65fe9..fdb451e3ec01 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -304,7 +304,7 @@ bool ttm_ref_object_exists(struct ttm_object_file *tfile, * Verify that the ref->obj pointer was actually valid! */ rmb(); - if (unlikely(atomic_read(&ref->kref.refcount) == 0)) + if (unlikely(kref_read(&ref->kref) == 0)) goto out_false; rcu_read_unlock(); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index b9efadfffb4f..e66e75921797 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -55,14 +55,14 @@ #define put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ - WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + ep, kref_read(&((ep)->kref))); \ + WARN_ON(kref_read(&((ep)->kref)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } #define get_ep(ep) { \ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ + ep, kref_read(&((ep)->kref))); \ kref_get(&((ep)->kref)); \ } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index d939980a708f..a9194db7f9b8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -961,7 +961,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, case IWCH_QP_STATE_RTS: switch (attrs->next_state) { case IWCH_QP_STATE_CLOSING: - BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + BUG_ON(kref_read(&qhp->ep->com.kref) < 2); qhp->attr.state = IWCH_QP_STATE_CLOSING; if (!internal) { abort=0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4788e1a46fde..4dc141596e57 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -654,14 +654,14 @@ enum c4iw_mmid_state { #define c4iw_put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ - WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + ep, kref_read(&((ep)->kref))); \ + WARN_ON(kref_read(&((ep)->kref)) < 1); \ kref_put(&((ep)->kref), _c4iw_free_ep); \ } #define c4iw_get_ep(ep) { \ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ + ep, kref_read(&((ep)->kref))); \ kref_get(&((ep)->kref)); \ } void _c4iw_free_ep(struct kref *kref); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cda5542e13a2..347b3c93ffd7 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1503,7 +1503,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + BUG_ON(kref_read(&qhp->ep->com.kref) < 2); t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 80ef3f8998c8..04443242e258 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -80,7 +80,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr, left = PAGE_SIZE; mutex_lock(&us_ibdev->usdev_lock); - if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) { + if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; /* @@ -99,7 +99,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr, PCI_FUNC(us_ibdev->pdev->devfn), netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac, - atomic_read(&us_ibdev->vf_cnt.refcount)); + kref_read(&us_ibdev->vf_cnt)); UPDATE_PTR_LEFT(n, ptr, left); for (res_type = USNIC_VNIC_RES_TYPE_EOL; @@ -147,7 +147,7 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); return scnprintf(buf, PAGE_SIZE, "%u\n", - atomic_read(&us_ibdev->vf_cnt.refcount)); + kref_read(&us_ibdev->vf_cnt)); } static ssize_t diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 74819a7951e2..69df8e353123 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -291,11 +291,11 @@ int usnic_ib_query_device(struct ib_device *ibdev, qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); props->max_qp = qp_per_vf * - atomic_read(&us_ibdev->vf_cnt.refcount); + kref_read(&us_ibdev->vf_cnt); props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * - atomic_read(&us_ibdev->vf_cnt.refcount); + kref_read(&us_ibdev->vf_cnt); props->max_pd = USNIC_UIOM_MAX_PD_CNT; props->max_mr = USNIC_UIOM_MAX_MR_CNT; props->local_ca_ack_delay = 0; diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 7f1b282d7d96..cb290b8ca0c8 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -1396,7 +1396,7 @@ int genwqe_device_remove(struct genwqe_dev *cd) * application which will decrease this reference from * 1/unused to 0/illegal and not from 2/used 1/empty. */ - rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount); + rc = kref_read(&cd->cdev_genwqe.kobj.kref); if (rc != 1) { dev_err(&pci_dev->dev, "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc); diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c index c6c051b52f55..e32a92341b54 100644 --- a/drivers/misc/mei/debugfs.c +++ b/drivers/misc/mei/debugfs.c @@ -67,7 +67,7 @@ static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf, me_cl->props.max_number_of_connections, me_cl->props.max_msg_length, me_cl->props.single_recv_buf, - atomic_read(&me_cl->refcnt.refcount)); + kref_read(&me_cl->refcnt)); mei_me_cl_put(me_cl); } diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 56efaf72d08e..d2961ef39a3a 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -155,7 +155,7 @@ static void pnv_php_detach_device_nodes(struct device_node *parent) pnv_php_detach_device_nodes(dn); of_node_put(dn); - refcount = atomic_read(&dn->kobj.kref.refcount); + refcount = kref_read(&dn->kobj.kref); if (refcount != 1) pr_warn("Invalid refcount %d on <%s>\n", refcount, of_node_full_name(dn)); diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 429d34c348b9..e42909524dee 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -345,7 +345,7 @@ EXPORT_SYMBOL_GPL(pci_create_slot); void pci_destroy_slot(struct pci_slot *slot) { dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", - slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); + slot->number, kref_read(&slot->kobj.kref) - 1); mutex_lock(&pci_slot_mutex); kobject_put(&slot->kobj); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index f501095f91ac..898461b146cc 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -74,7 +74,7 @@ static void bnx2fc_cmd_timeout(struct work_struct *work) &io_req->req_flags)) { /* Handle internally generated ABTS timeout */ BNX2FC_IO_DBG(io_req, "ABTS timed out refcnt = %d\n", - io_req->refcount.refcount.counter); + kref_read(&io_req->refcount)); if (!(test_and_set_bit(BNX2FC_FLAG_ABTS_DONE, &io_req->req_flags))) { /* @@ -1141,7 +1141,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) return SUCCESS; } BNX2FC_IO_DBG(io_req, "eh_abort - refcnt = %d\n", - io_req->refcount.refcount.counter); + kref_read(&io_req->refcount)); /* Hold IO request across abort processing */ kref_get(&io_req->refcount); @@ -1299,7 +1299,7 @@ void bnx2fc_process_cleanup_compl(struct bnx2fc_cmd *io_req, { BNX2FC_IO_DBG(io_req, "Entered process_cleanup_compl " "refcnt = %d, cmd_type = %d\n", - io_req->refcount.refcount.counter, io_req->cmd_type); + kref_read(&io_req->refcount), io_req->cmd_type); bnx2fc_scsi_done(io_req, DID_ERROR); kref_put(&io_req->refcount, bnx2fc_cmd_release); if (io_req->wait_for_comp) @@ -1318,7 +1318,7 @@ void bnx2fc_process_abts_compl(struct bnx2fc_cmd *io_req, BNX2FC_IO_DBG(io_req, "Entered process_abts_compl xid = 0x%x" "refcnt = %d, cmd_type = %d\n", io_req->xid, - io_req->refcount.refcount.counter, io_req->cmd_type); + kref_read(&io_req->refcount), io_req->cmd_type); if (test_and_set_bit(BNX2FC_FLAG_ABTS_DONE, &io_req->req_flags)) { diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index 95ba99044c3e..18e0ea83d361 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -301,7 +301,7 @@ static inline void __cxgbi_sock_put(const char *fn, struct cxgbi_sock *csk) { log_debug(1 << CXGBI_DBG_SOCK, "%s, put csk 0x%p, ref %u-1.\n", - fn, csk, atomic_read(&csk->refcnt.refcount)); + fn, csk, kref_read(&csk->refcnt)); kref_put(&csk->refcnt, cxgbi_sock_free); } #define cxgbi_sock_put(csk) __cxgbi_sock_put(__func__, csk) @@ -310,7 +310,7 @@ static inline void __cxgbi_sock_get(const char *fn, struct cxgbi_sock *csk) { log_debug(1 << CXGBI_DBG_SOCK, "%s, get csk 0x%p, ref %u+1.\n", - fn, csk, atomic_read(&csk->refcnt.refcount)); + fn, csk, kref_read(&csk->refcnt)); kref_get(&csk->refcnt); } #define cxgbi_sock_get(csk) __cxgbi_sock_get(__func__, csk) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index a63542bac153..caa7a7b0ec53 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -607,7 +607,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) len += snprintf(buf+len, size-len, "usgmap:%x ", ndlp->nlp_usg_map); len += snprintf(buf+len, size-len, "refcnt:%x", - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); len += snprintf(buf+len, size-len, "\n"); } spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 236e4e51d161..7a8829546068 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3688,7 +3688,7 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "0006 rpi%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (NLP_CHK_NODE_ACT(ndlp)) { lpfc_nlp_put(ndlp); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index ed223937798a..82047070cdc9 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -3440,7 +3440,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0002 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; @@ -3861,7 +3861,7 @@ out: lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0003 rpi:%x DID:%x flg:%x %d map%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (vport->port_state < LPFC_VPORT_READY) { @@ -4238,7 +4238,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0277 lpfc_enable_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } /* The ndlp should not already be in active mode */ @@ -4248,7 +4248,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0278 lpfc_enable_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } @@ -4272,7 +4272,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0008 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); } @@ -4546,7 +4546,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == LPFC_SLI_INTF_IF_TYPE_2) && - (atomic_read(&ndlp->kref.refcount) > 0)) { + (kref_read(&ndlp->kref) > 0)) { mbox->context1 = lpfc_nlp_get(ndlp); mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; @@ -4695,14 +4695,14 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "0280 lpfc_cleanup_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); lpfc_dequeue_node(vport, ndlp); } else { lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE, "0281 lpfc_cleanup_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); lpfc_disable_node(vport, ndlp); } @@ -4791,7 +4791,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0005 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) { @@ -5557,7 +5557,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0004 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); /* * Start issuing Fabric-Device Management Interface (FDMI) command to @@ -5728,7 +5728,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0007 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); ndlp->active_rrqs_xri_bitmap = @@ -5767,7 +5767,7 @@ lpfc_nlp_release(struct kref *kref) "0279 lpfc_nlp_release: ndlp:x%p did %x " "usgmap:x%x refcnt:%d rpi:%x\n", (void *)ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount), ndlp->nlp_rpi); + kref_read(&ndlp->kref), ndlp->nlp_rpi); /* remove ndlp from action. */ lpfc_nlp_remove(ndlp->vport, ndlp); @@ -5804,7 +5804,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node get: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); /* The check of ndlp usage to prevent incrementing the * ndlp reference count that is in the process of being * released. @@ -5817,7 +5817,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp) "0276 lpfc_nlp_get: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } else kref_get(&ndlp->kref); @@ -5844,7 +5844,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node put: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); phba = ndlp->phba; spin_lock_irqsave(&phba->ndlp_lock, flags); /* Check the ndlp memory free acknowledge flag to avoid the @@ -5857,7 +5857,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) "0274 lpfc_nlp_put: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return 1; } /* Check the ndlp inactivate log flag to avoid the possible @@ -5870,7 +5870,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) "0275 lpfc_nlp_put: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return 1; } /* For last put, mark the ndlp usage flags to make sure no @@ -5878,7 +5878,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) * in between the process when the final kref_put has been * invoked on this ndlp. */ - if (atomic_read(&ndlp->kref.refcount) == 1) { + if (kref_read(&ndlp->kref) == 1) { /* Indicate ndlp is put to inactive state. */ NLP_SET_IACT_REQ(ndlp); /* Acknowledge ndlp memory free has been seen. */ @@ -5906,8 +5906,8 @@ lpfc_nlp_not_used(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node not used: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); - if (atomic_read(&ndlp->kref.refcount) == 1) + kref_read(&ndlp->kref)); + if (kref_read(&ndlp->kref) == 1) if (lpfc_nlp_put(ndlp)) return 1; return 0; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4776fd85514f..64717c171b15 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2660,8 +2660,7 @@ lpfc_cleanup(struct lpfc_vport *vport) "usgmap:x%x refcnt:%d\n", ndlp->nlp_DID, (void *)ndlp, ndlp->nlp_usg_map, - atomic_read( - &ndlp->kref.refcount)); + kref_read(&ndlp->kref)); } break; } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 6643f6fc7795..dd28c69b6a92 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -371,7 +371,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) */ pr_debug("write_pending aborted cmd[%p] refcount %d " "transport_state %x, t_state %x, se_cmd_flags %x\n", - cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); @@ -584,7 +584,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) */ pr_debug("queue_data_in aborted cmd[%p] refcount %d " "transport_state %x, t_state %x, se_cmd_flags %x\n", - cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index b653451843c8..937c2d5d7ec3 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -1300,7 +1300,7 @@ static int ion_debug_heap_show(struct seq_file *s, void *unused) seq_printf(s, "%16s %16u %16zu %d %d\n", buffer->task_comm, buffer->pid, buffer->size, buffer->kmap_cnt, - atomic_read(&buffer->ref.refcount)); + kref_read(&buffer->ref)); total_orphaned_size += buffer->size; } } diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c index c7d7682b1412..1e1df89b5018 100644 --- a/drivers/staging/comedi/comedi_buf.c +++ b/drivers/staging/comedi/comedi_buf.c @@ -188,7 +188,7 @@ bool comedi_buf_is_mmapped(struct comedi_subdevice *s) { struct comedi_buf_map *bm = s->async->buf_map; - return bm && (atomic_read(&bm->refcount.refcount) > 1); + return bm && (kref_read(&bm->refcount) > 1); } int comedi_buf_alloc(struct comedi_device *dev, struct comedi_subdevice *s, diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d761025144f9..e18051185846 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -788,7 +788,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * __core_scsi3_add_registration() */ dest_lun = rcu_dereference_check(deve_tmp->se_lun, - atomic_read(&deve_tmp->pr_kref.refcount) != 0); + kref_read(&deve_tmp->pr_kref) != 0); pr_reg_atp = __core_scsi3_do_alloc_registration(dev, nacl_tmp, dest_lun, deve_tmp, @@ -1463,7 +1463,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) * For nacl->dynamic_node_acl=1 */ lun_acl = rcu_dereference_check(se_deve->se_lun_acl, - atomic_read(&se_deve->pr_kref.refcount) != 0); + kref_read(&se_deve->pr_kref) != 0); if (!lun_acl) return 0; @@ -1478,7 +1478,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) * For nacl->dynamic_node_acl=1 */ lun_acl = rcu_dereference_check(se_deve->se_lun_acl, - atomic_read(&se_deve->pr_kref.refcount) != 0); + kref_read(&se_deve->pr_kref) != 0); if (!lun_acl) { kref_put(&se_deve->pr_kref, target_pr_kref_release); return; @@ -1759,7 +1759,7 @@ core_scsi3_decode_spec_i_port( * 2nd loop which will never fail. */ dest_lun = rcu_dereference_check(dest_se_deve->se_lun, - atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + kref_read(&dest_se_deve->pr_kref) != 0); dest_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, dest_lun, dest_se_deve, @@ -3466,7 +3466,7 @@ after_iport_check: iport_ptr); if (!dest_pr_reg) { struct se_lun *dest_lun = rcu_dereference_check(dest_se_deve->se_lun, - atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + kref_read(&dest_se_deve->pr_kref) != 0); spin_unlock(&dev->dev_reservation_lock); if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index fd5c3de79470..c91979c1463d 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -454,7 +454,7 @@ static void ft_sess_free(struct kref *kref) void ft_sess_put(struct ft_sess *sess) { - int sess_held = atomic_read(&sess->kref.refcount); + int sess_held = kref_read(&sess->kref); BUG_ON(!sess_held); kref_put(&sess->kref, ft_sess_free); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 5e746adc8a2d..365443cae5b1 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3687,7 +3687,7 @@ static void ffs_closed(struct ffs_data *ffs) goto done; if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent - || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) + || !kref_read(&opts->func_inst.group.cg_item.ci_kref)) goto done; ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 5e6a2c0a1f0b..1f7d5e46cdda 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -122,7 +122,7 @@ void exofs_sysfs_dbg_print(void) list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) { printk(KERN_INFO "%s: name %s ref %d\n", __func__, kobject_name(k_name), - (int)atomic_read(&k_name->kref.refcount)); + (int)kref_read(&k_name->kref)); } #endif } diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 27d1242c8383..564c504d6efd 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -349,7 +349,7 @@ static void sc_show_sock_container(struct seq_file *seq, " func key: 0x%08x\n" " func type: %u\n", sc, - atomic_read(&sc->sc_kref.refcount), + kref_read(&sc->sc_kref), &saddr, inet ? ntohs(sport) : 0, &daddr, inet ? ntohs(dport) : 0, sc->sc_node->nd_name, diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d4b5c81f0445..ec000575e863 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -97,7 +97,7 @@ typeof(sc) __sc = (sc); \ mlog(ML_SOCKET, "[sc %p refs %d sock %p node %u page %p " \ "pg_off %zu] " fmt, __sc, \ - atomic_read(&__sc->sc_kref.refcount), __sc->sc_sock, \ + kref_read(&__sc->sc_kref), __sc->sc_sock, \ __sc->sc_node->nd_num, __sc->sc_page, __sc->sc_page_off , \ ##args); \ } while (0) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e7b760deefae..9b984cae4c4e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -81,7 +81,7 @@ static void __dlm_print_lock(struct dlm_lock *lock) lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount), + kref_read(&lock->lock_refs), (list_empty(&lock->ast_list) ? 'y' : 'n'), (lock->ast_pending ? 'y' : 'n'), (list_empty(&lock->bast_list) ? 'y' : 'n'), @@ -106,7 +106,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) printk("lockres: %s, owner=%u, state=%u\n", buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", - res->last_used, atomic_read(&res->refs.refcount), + res->last_used, kref_read(&res->refs), list_empty(&res->purge) ? "no" : "yes"); printk(" on dirty list: %s, on reco list: %s, " "migrating pending: %s\n", @@ -298,7 +298,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) mle_type, mle->master, mle->new_master, !list_empty(&mle->hb_events), !!mle->inuse, - atomic_read(&mle->mle_refs.refcount)); + kref_read(&mle->mle_refs)); out += snprintf(buf + out, len - out, "Maybe="); out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, @@ -494,7 +494,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) lock->ast_pending, lock->bast_pending, lock->convert_pending, lock->lock_pending, lock->cancel_pending, lock->unlock_pending, - atomic_read(&lock->lock_refs.refcount)); + kref_read(&lock->lock_refs)); spin_unlock(&lock->spinlock); return out; @@ -521,7 +521,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) !list_empty(&res->recovering), res->inflight_locks, res->migration_pending, atomic_read(&res->asts_reserved), - atomic_read(&res->refs.refcount)); + kref_read(&res->refs)); /* refmap */ out += snprintf(buf + out, len - out, "RMAP:"); @@ -777,7 +777,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) /* Purge Count: xxx Refs: xxx */ out += snprintf(buf + out, len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); /* Dead Node: xxx */ out += snprintf(buf + out, len - out, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 733e4e79c8e2..32fd261ae13d 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -2072,7 +2072,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); mlog(0, "context init: refcount %u\n", - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); leave: if (ret < 0 && dlm) { diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a464c8088170..7025d8c27999 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -233,7 +233,7 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle) assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); - if (!atomic_read(&mle->mle_refs.refcount)) { + if (!kref_read(&mle->mle_refs)) { /* this may or may not crash, but who cares. * it's a BUG. */ mlog(ML_ERROR, "bad mle: %p\n", mle); @@ -1124,9 +1124,9 @@ recheck: unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); /* - if (atomic_read(&mle->mle_refs.refcount) < 2) + if (kref_read(&mle->mle_refs) < 2) mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle, - atomic_read(&mle->mle_refs.refcount), + kref_read(&mle->mle_refs), res->lockname.len, res->lockname.name); */ atomic_set(&mle->woken, 0); @@ -1979,7 +1979,7 @@ ok: * on this mle. */ spin_lock(&dlm->master_lock); - rr = atomic_read(&mle->mle_refs.refcount); + rr = kref_read(&mle->mle_refs); if (mle->inuse > 0) { if (extra_ref && rr < 3) err = 1; diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 1082b2c3014b..63d701cd1e2e 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -251,7 +251,7 @@ leave: mlog(0, "lock %u:%llu should be gone now! refs=%d\n", dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount)-1); + kref_read(&lock->lock_refs)-1); dlm_lock_put(lock); } if (actions & DLM_UNLOCK_CALL_AST) diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 1ddfa2928802..a232e7f0c869 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -247,7 +247,7 @@ static inline void drm_framebuffer_unreference(struct drm_framebuffer *fb) */ static inline uint32_t drm_framebuffer_read_refcount(struct drm_framebuffer *fb) { - return atomic_read(&fb->base.refcount.refcount); + return kref_read(&fb->base.refcount); } /** diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cdbdb40eb5bd..feecf33a1212 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -878,7 +878,7 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo, { int ret; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); ret = __ttm_bo_reserve(bo, interruptible, no_wait, ticket); if (likely(ret == 0)) @@ -903,7 +903,7 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, { int ret = 0; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); if (interruptible) ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, diff --git a/include/linux/kref.h b/include/linux/kref.h index 9af255ad1e2f..7c88d865f82f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -35,6 +35,11 @@ static inline void kref_init(struct kref *kref) atomic_set(&kref->refcount, 1); } +static inline int kref_read(const struct kref *kref) +{ + return atomic_read(&kref->refcount); +} + /** * kref_get - increment refcount for object. * @kref: object. diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..8a511c0985aa 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -198,7 +198,7 @@ static inline struct cache_head *cache_get(struct cache_head *h) static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->ref.refcount) <= 2 && + if (kref_read(&h->ref) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; kref_put(&h->ref, cd->cache_put); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 554671c81f4a..90708f68cc02 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -987,7 +987,7 @@ static inline void hci_conn_drop(struct hci_conn *conn) static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } @@ -995,7 +995,7 @@ static inline void hci_dev_put(struct hci_dev *d) static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 1904a93f47d5..d491529332f4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -920,7 +920,7 @@ static void chan_close_cb(struct l2cap_chan *chan) BT_DBG("dev %p removing %speer %p", dev, last ? "last " : "1 ", peer); BT_DBG("chan %p orig refcnt %d", chan, - atomic_read(&chan->kref.refcount)); + kref_read(&chan->kref)); l2cap_chan_put(chan); break; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 5f123c3320a7..f0095fd79818 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -810,7 +810,7 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked) /* AMP Manager functions */ struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr) { - BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount)); + BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); kref_get(&mgr->kref); @@ -833,7 +833,7 @@ static void amp_mgr_destroy(struct kref *kref) int amp_mgr_put(struct amp_mgr *mgr) { - BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount)); + BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); return kref_put(&mgr->kref, &_mgr_destroy); } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index e32f34189007..02a4ccc04e1e 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -24,7 +24,7 @@ void amp_ctrl_get(struct amp_ctrl *ctrl) { BT_DBG("ctrl %p orig refcnt %d", ctrl, - atomic_read(&ctrl->kref.refcount)); + kref_read(&ctrl->kref)); kref_get(&ctrl->kref); } @@ -42,7 +42,7 @@ static void amp_ctrl_destroy(struct kref *kref) int amp_ctrl_put(struct amp_ctrl *ctrl) { BT_DBG("ctrl %p orig refcnt %d", ctrl, - atomic_read(&ctrl->kref.refcount)); + kref_read(&ctrl->kref)); return kref_put(&ctrl->kref, &_ctrl_destroy); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ce0b5dd01953..fc7f321a3823 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -481,14 +481,14 @@ static void l2cap_chan_destroy(struct kref *kref) void l2cap_chan_hold(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); kref_get(&c->kref); } void l2cap_chan_put(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); kref_put(&c->kref, l2cap_chan_destroy); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 770c52701efa..bad3d4ae43f6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3425,7 +3425,7 @@ static void ceph_msg_release(struct kref *kref) struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) { dout("%s %p (was %d)\n", __func__, msg, - atomic_read(&msg->kref.refcount)); + kref_read(&msg->kref)); kref_get(&msg->kref); return msg; } @@ -3434,7 +3434,7 @@ EXPORT_SYMBOL(ceph_msg_get); void ceph_msg_put(struct ceph_msg *msg) { dout("%s %p (was %d)\n", __func__, msg, - atomic_read(&msg->kref.refcount)); + kref_read(&msg->kref)); kref_put(&msg->kref, ceph_msg_release); } EXPORT_SYMBOL(ceph_msg_put); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 842f049abb86..f3378ba1a828 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -438,7 +438,7 @@ static void ceph_osdc_release_request(struct kref *kref) void ceph_osdc_get_request(struct ceph_osd_request *req) { dout("%s %p (was %d)\n", __func__, req, - atomic_read(&req->r_kref.refcount)); + kref_read(&req->r_kref)); kref_get(&req->r_kref); } EXPORT_SYMBOL(ceph_osdc_get_request); @@ -447,7 +447,7 @@ void ceph_osdc_put_request(struct ceph_osd_request *req) { if (req) { dout("%s %p (was %d)\n", __func__, req, - atomic_read(&req->r_kref.refcount)); + kref_read(&req->r_kref)); kref_put(&req->r_kref, ceph_osdc_release_request); } } @@ -487,11 +487,11 @@ static void request_reinit(struct ceph_osd_request *req) struct ceph_msg *reply_msg = req->r_reply; dout("%s req %p\n", __func__, req); - WARN_ON(atomic_read(&req->r_kref.refcount) != 1); + WARN_ON(kref_read(&req->r_kref) != 1); request_release_checks(req); - WARN_ON(atomic_read(&request_msg->kref.refcount) != 1); - WARN_ON(atomic_read(&reply_msg->kref.refcount) != 1); + WARN_ON(kref_read(&request_msg->kref) != 1); + WARN_ON(kref_read(&reply_msg->kref) != 1); target_destroy(&req->r_t); request_init(req); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8147e8d56eb2..f39e3e11f9aa 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1358,7 +1358,7 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), - atomic_read(&cp->ref.refcount), cp->flags); + kref_read(&cp->ref), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cb..04e7f8707d71 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -490,7 +490,7 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) svc_xprt_get(xprt); dprintk("svc: transport %p dequeued, inuse=%d\n", - xprt, atomic_read(&xprt->xpt_ref.refcount)); + xprt, kref_read(&xprt->xpt_ref)); } spin_unlock_bh(&pool->sp_lock); out: @@ -820,7 +820,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, rqstp->rq_pool->sp_id, xprt, - atomic_read(&xprt->xpt_ref.refcount)); + kref_read(&xprt->xpt_ref)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); @@ -978,7 +978,7 @@ static void svc_age_temp_xprts(unsigned long closure) * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; - if (atomic_read(&xprt->xpt_ref.refcount) > 1 || + if (kref_read(&xprt->xpt_ref) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; list_del_init(le); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ca2799af05a6..39652d390a9c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1201,9 +1201,9 @@ static void __svc_rdma_free(struct work_struct *work) ib_drain_qp(rdma->sc_qp); /* We should only be called from kref_put */ - if (atomic_read(&xprt->xpt_ref.refcount) != 0) + if (kref_read(&xprt->xpt_ref) != 0) pr_err("svcrdma: sc_xprt still in use? (%d)\n", - atomic_read(&xprt->xpt_ref.refcount)); + kref_read(&xprt->xpt_ref)); /* * Destroy queued, but not processed read completions. Note -- cgit v1.2.3 From bdfafc4ffdd24e491119d81f85ddc4393fa49803 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:34:19 +0100 Subject: locking/atomic, kref: Kill kref_sub() By general sentiment kref_sub() is a bad interface, make it go away. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/block/drbd/drbd_main.c | 7 ++-- drivers/block/drbd/drbd_req.c | 31 ++++++------------ drivers/gpu/drm/ttm/ttm_bo.c | 59 +++++++++------------------------- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 4 +-- include/drm/ttm/ttm_bo_api.h | 15 +-------- include/linux/kref.h | 32 +++--------------- 6 files changed, 36 insertions(+), 112 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 83482721bc01..c3ff60c30dde 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2948,7 +2948,6 @@ void drbd_delete_device(struct drbd_device *device) struct drbd_resource *resource = device->resource; struct drbd_connection *connection; struct drbd_peer_device *peer_device; - int refs = 3; /* move to free_peer_device() */ for_each_peer_device(peer_device, device) @@ -2956,13 +2955,15 @@ void drbd_delete_device(struct drbd_device *device) drbd_debugfs_device_cleanup(device); for_each_connection(connection, resource) { idr_remove(&connection->peer_devices, device->vnr); - refs++; + kref_put(&device->kref, drbd_destroy_device); } idr_remove(&resource->devices, device->vnr); + kref_put(&device->kref, drbd_destroy_device); idr_remove(&drbd_devices, device_to_minor(device)); + kref_put(&device->kref, drbd_destroy_device); del_gendisk(device->vdisk); synchronize_rcu(); - kref_sub(&device->kref, refs, drbd_destroy_device); + kref_put(&device->kref, drbd_destroy_device); } static int __init drbd_init(void) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 74306c054983..b489ac2e9c44 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -421,7 +421,6 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, struct drbd_peer_device *peer_device = first_peer_device(device); unsigned s = req->rq_state; int c_put = 0; - int k_put = 0; if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP)) set |= RQ_COMPLETION_SUSP; @@ -437,6 +436,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* intent: get references */ + kref_get(&req->kref); + if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING)) atomic_inc(&req->completion_ref); @@ -473,15 +474,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING); - /* local completion may still come in later, - * we need to keep the req object around. */ - kref_get(&req->kref); ++c_put; } if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) { if (req->rq_state & RQ_LOCAL_ABORTED) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); else ++c_put; list_del_init(&req->req_pending_local); @@ -503,7 +501,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (s & RQ_NET_SENT) atomic_sub(req->i.size >> 9, &device->ap_in_flight); if (s & RQ_EXP_BARR_ACK) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); req->net_done_jif = jiffies; /* in ahead/behind mode, or just in case, @@ -516,25 +514,16 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* potentially complete and destroy */ - if (k_put || c_put) { - /* Completion does it's own kref_put. If we are going to - * kref_sub below, we need req to be still around then. */ - int at_least = k_put + !!c_put; - int refcount = kref_read(&req->kref); - if (refcount < at_least) - drbd_err(device, - "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", - s, req->rq_state, refcount, at_least); - } - /* If we made progress, retry conflicting peer requests, if any. */ if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) - k_put += drbd_req_put_completion_ref(req, m, c_put); - if (k_put) - kref_sub(&req->kref, k_put, drbd_req_destroy); + if (c_put) { + if (drbd_req_put_completion_ref(req, m, c_put)) + kref_put(&req->kref, drbd_req_destroy); + } else { + kref_put(&req->kref, drbd_req_destroy); + } } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 30aefcc0c969..ffc6cb55c78c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -181,61 +181,46 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) } EXPORT_SYMBOL(ttm_bo_add_to_lru); -int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) +static void ttm_bo_ref_bug(struct kref *list_kref) +{ + BUG(); +} + +void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; - int put_count = 0; if (bdev->driver->lru_removal) bdev->driver->lru_removal(bo); if (!list_empty(&bo->swap)) { list_del_init(&bo->swap); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); } - - return put_count; -} - -static void ttm_bo_ref_bug(struct kref *list_kref) -{ - BUG(); -} - -void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free) -{ - kref_sub(&bo->list_kref, count, - (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list); } void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo) { - int put_count; - spin_lock(&bo->glob->lru_lock); - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&bo->glob->lru_lock); - ttm_bo_list_ref_sub(bo, put_count, true); } EXPORT_SYMBOL(ttm_bo_del_sub_from_lru); void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; - int put_count = 0; lockdep_assert_held(&bo->resv->lock.base); if (bdev->driver->lru_removal) bdev->driver->lru_removal(bo); - put_count = ttm_bo_del_from_lru(bo); - ttm_bo_list_ref_sub(bo, put_count, true); + ttm_bo_del_from_lru(bo); ttm_bo_add_to_lru(bo); } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -447,7 +432,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; - int put_count; int ret; spin_lock(&glob->lru_lock); @@ -455,13 +439,10 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) if (!ret) { if (!ttm_bo_wait(bo, false, true)) { - put_count = ttm_bo_del_from_lru(bo); - + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - ttm_bo_list_ref_sub(bo, put_count, true); - return; } else ttm_bo_flush_all_fences(bo); @@ -504,7 +485,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, bool no_wait_gpu) { struct ttm_bo_global *glob = bo->glob; - int put_count; int ret; ret = ttm_bo_wait(bo, false, true); @@ -554,15 +534,13 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - ttm_bo_list_ref_sub(bo, put_count, true); - return 0; } @@ -740,7 +718,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, struct ttm_bo_global *glob = bdev->glob; struct ttm_mem_type_manager *man = &bdev->man[mem_type]; struct ttm_buffer_object *bo; - int ret = -EBUSY, put_count; + int ret = -EBUSY; spin_lock(&glob->lru_lock); list_for_each_entry(bo, &man->lru, lru) { @@ -771,13 +749,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); BUG_ON(ret != 0); - ttm_bo_list_ref_sub(bo, put_count, true); - ret = ttm_bo_evict(bo, interruptible, no_wait_gpu); ttm_bo_unreserve(bo); @@ -1669,7 +1645,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) container_of(shrink, struct ttm_bo_global, shrink); struct ttm_buffer_object *bo; int ret = -EBUSY; - int put_count; uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); spin_lock(&glob->lru_lock); @@ -1692,11 +1667,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - ttm_bo_list_ref_sub(bo, put_count, true); - /** * Move to system cached */ diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index d35bc491e8de..5e1bcabffef5 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -48,9 +48,7 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list) list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - unsigned put_count = ttm_bo_del_from_lru(bo); - - ttm_bo_list_ref_sub(bo, put_count, true); + ttm_bo_del_from_lru(bo); } } diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 652e45be97c8..9a465314572c 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -332,19 +332,6 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, */ extern void ttm_bo_unref(struct ttm_buffer_object **bo); - -/** - * ttm_bo_list_ref_sub - * - * @bo: The buffer object. - * @count: The number of references with which to decrease @bo::list_kref; - * @never_free: The refcount should not reach zero with this operation. - * - * Release @count lru list references to this buffer object. - */ -extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free); - /** * ttm_bo_add_to_lru * @@ -367,7 +354,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); * and is usually called just immediately after the bo has been reserved to * avoid recursive reservation from lru lists. */ -extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); +extern void ttm_bo_del_from_lru(struct ttm_buffer_object *bo); /** * ttm_bo_move_to_lru_tail diff --git a/include/linux/kref.h b/include/linux/kref.h index 7c88d865f82f..31c49a64cdf9 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -54,9 +54,8 @@ static inline void kref_get(struct kref *kref) } /** - * kref_sub - subtract a number of refcounts for object. + * kref_put - decrement refcount for object. * @kref: object. - * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree @@ -65,46 +64,23 @@ static inline void kref_get(struct kref *kref) * maintainer, and anyone else who happens to notice it. You have * been warned. * - * Subtract @count from the refcount, and if 0, call release(). + * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_sub(struct kref *kref, unsigned int count, - void (*release)(struct kref *kref)) +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { WARN_ON(release == NULL); - if (atomic_sub_and_test((int) count, &kref->refcount)) { + if (atomic_dec_and_test(&kref->refcount)) { release(kref); return 1; } return 0; } -/** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. If the caller does pass kfree to this - * function, you will be publicly mocked mercilessly by the kref - * maintainer, and anyone else who happens to notice it. You have - * been warned. - * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. - */ -static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) -{ - return kref_sub(kref, 1, release); -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) -- cgit v1.2.3 From 23b19ec3377924eb8f303880102e056e9214ba72 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 12:11:59 +0100 Subject: locking/ww_mutex: Turn off __must_check for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the ww_mutex inline wrappers gone there's a lot of dormant anti-patterns emerging in an x86 allyesconfig build: kernel/locking/test-ww_mutex.c:80:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:55:3: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:134:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:213:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:177:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:266:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/drm_modeset_lock.c:430:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/vgem/vgem_fence.c:193:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_batch_pool.c:125:4: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_execbuffer.c:1302:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/radeon/radeon_prime.c:69:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/nouveau/nouveau_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] ... but we cannot just litter the kernel build log with such warnings. These need to be fixed separately - turn off the warning for now. Cc: Nicolai Hähnle Cc: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 083950fd5b0b..5dd9a7682227 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -215,8 +215,7 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -extern int __must_check ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); +extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible -- cgit v1.2.3 From f21860bac05b609d71757338361d26209ff0759b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 17:11:36 +0100 Subject: locking/mutex, sched/wait: Fix the mutex_lock_io_nested() define Mike noticed this bogosity: > > +# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) > ^^^^^^^^^^^^^^ typo This new locking API is not used yet, so this didn't trigger in testing. Fix it. Reported-by: Mike Galbraith Cc: Tejun Heo Cc: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 980ba16b8749..7fffbfcd5430 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -179,7 +179,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) +# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock) #endif /* -- cgit v1.2.3 From 0a13cd1a05e7a549259d4f803d2ec2efda07ed7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:03:11 +0100 Subject: locking/atomic, kref: Implement kref_put_lock() Because home-rolling your own is _awesome_, stop doing it. Provide kref_put_lock(), just like kref_put_mutex() but for a spinlock. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 22 ++++++++++++++++------ net/sunrpc/svcauth.c | 15 ++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/kref.h b/include/linux/kref.h index 31c49a64cdf9..9db9685fed84 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -19,6 +19,7 @@ #include #include #include +#include struct kref { atomic_t refcount; @@ -86,12 +87,21 @@ static inline int kref_put_mutex(struct kref *kref, struct mutex *lock) { WARN_ON(release == NULL); - if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { - mutex_lock(lock); - if (unlikely(!atomic_dec_and_test(&kref->refcount))) { - mutex_unlock(lock); - return 0; - } + + if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) { + release(kref); + return 1; + } + return 0; +} + +static inline int kref_put_lock(struct kref *kref, + void (*release)(struct kref *kref), + spinlock_t *lock) +{ + WARN_ON(release == NULL); + + if (atomic_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index e112da8005b5..bb8db3cb8032 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -126,13 +126,18 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister); static struct hlist_head auth_domain_table[DN_HASHMAX]; static DEFINE_SPINLOCK(auth_domain_lock); +static void auth_domain_release(struct kref *kref) +{ + struct auth_domain *dom = container_of(kref, struct auth_domain, ref); + + hlist_del(&dom->hash); + dom->flavour->domain_release(dom); + spin_unlock(&auth_domain_lock); +} + void auth_domain_put(struct auth_domain *dom) { - if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) { - hlist_del(&dom->hash); - dom->flavour->domain_release(dom); - spin_unlock(&auth_domain_lock); - } + kref_put_lock(&dom->ref, auth_domain_release, &auth_domain_lock); } EXPORT_SYMBOL_GPL(auth_domain_put); -- cgit v1.2.3 From 85b36c931ff328297572a3e6136fac573795ad79 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 18 Jan 2017 09:38:04 -0800 Subject: jump_labels: Move header guard #endif down where it belongs The ending header guard is misplaced. This has no functional change, this is just an eye-sore. Signed-off-by: Luis R. Rodriguez Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: bp@suse.de Cc: catalin.marinas@arm.com Cc: jbaron@akamai.com Cc: pbonzini@redhat.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/20170118173804.16281-1-mcgrof@kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a0547c571800..b63d6b7b0db0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void); #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) -#endif /* _LINUX_JUMP_LABEL_H */ - #endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_JUMP_LABEL_H */ -- cgit v1.2.3 From acb04058de49458010c44bb35b849d45113fd668 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jan 2017 14:36:33 +0100 Subject: sched/clock: Fix hotplug crash Mike reported that he could trigger the WARN_ON_ONCE() in set_sched_clock_stable() using hotplug. This exposed a fundamental problem with the interface, we should never mark the TSC stable if we ever find it to be unstable. Therefore set_sched_clock_stable() is a broken interface. The reason it existed is that not having it is a pain, it means all relevant architecture code needs to call clear_sched_clock_stable() where appropriate. Of the three architectures that select HAVE_UNSTABLE_SCHED_CLOCK ia64 and parisc are trivial in that they never called set_sched_clock_stable(), so add an unconditional call to clear_sched_clock_stable() to them. For x86 the story is a lot more involved, and what this patch tries to do is ensure we preserve the status quo. So even is Cyrix or Transmeta have usable TSC they never called set_sched_clock_stable() so they now get an explicit mark unstable. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9881b024b7d7 ("sched/clock: Delay switching sched_clock to stable") Link: http://lkml.kernel.org/r/20170119133633.GB6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/ia64/kernel/setup.c | 2 ++ arch/parisc/kernel/setup.c | 2 ++ arch/x86/kernel/cpu/amd.c | 6 ++++-- arch/x86/kernel/cpu/centaur.c | 6 ++++-- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/cyrix.c | 2 ++ arch/x86/kernel/cpu/intel.c | 6 ++++-- arch/x86/kernel/cpu/transmeta.c | 3 +++ arch/x86/kernel/kvmclock.c | 2 +- include/linux/sched.h | 1 - kernel/sched/clock.c | 29 ++++++++--------------------- 11 files changed, 33 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 7ec7acc844c2..c483ece3eb84 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -619,6 +619,8 @@ setup_arch (char **cmdline_p) check_sal_cache_flush(); #endif paging_init(); + + clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 2e66a887788e..068ed3607bac 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -36,6 +36,7 @@ #undef PCI_DEBUG #include #include +#include #include #include @@ -176,6 +177,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; /* we use do_take_over_console() later ! */ #endif + clear_sched_clock_stable(); } /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71cae73a5076..1bb253a6ee4d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -548,8 +548,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 1661d8ec9280..2c234a6d94c4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,5 @@ -#include -#include + +#include #include #include @@ -104,6 +104,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + + clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dc1697ca5191..3457186275a0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -83,6 +83,7 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif + clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1055,6 +1056,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); + else + clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index bd9dcd6b712d..47416f959a48 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "cpu.h" @@ -183,6 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } + clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..26eaff4907b2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -124,8 +124,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 34178564be2a..c1ea5b999839 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -14,6 +15,8 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } + + clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2a5cafdf8808..542710b99f52 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -107,12 +107,12 @@ static inline void kvm_sched_clock_init(bool stable) { if (!stable) { pv_time_ops.sched_clock = kvm_clock_read; + clear_sched_clock_stable(); return; } kvm_sched_clock_offset = kvm_clock_read(); pv_time_ops.sched_clock = kvm_sched_clock_read; - set_sched_clock_stable(); printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", kvm_sched_clock_offset); diff --git a/include/linux/sched.h b/include/linux/sched.h index a8daed914eef..69e6852fede1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2547,7 +2547,6 @@ extern void sched_clock_init_late(void); * is reliable after all: */ extern int sched_clock_stable(void); -extern void set_sched_clock_stable(void); extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 7713b2b53f61..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -83,8 +83,15 @@ void sched_clock_init(void) } #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +/* + * We must start with !__sched_clock_stable because the unstable -> stable + * transition is accurate, while the stable -> unstable transition is not. + * + * Similarly we start with __sched_clock_stable_early, thereby assuming we + * will become stable, such that there's only a single 1 -> 0 transition. + */ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); -static int __sched_clock_stable_early; +static int __sched_clock_stable_early = 1; /* * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset @@ -132,24 +139,6 @@ static void __set_sched_clock_stable(void) tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } -void set_sched_clock_stable(void) -{ - __sched_clock_stable_early = 1; - - smp_mb(); /* matches sched_clock_init_late() */ - - /* - * This really should only be called early (before - * sched_clock_init_late()) when guestimating our sched_clock() is - * solid. - * - * After that we test stability and we can negate our guess using - * clear_sched_clock_stable, possibly from a watchdog. - */ - if (WARN_ON_ONCE(sched_clock_running == 2)) - __set_sched_clock_stable(); -} - static void __clear_sched_clock_stable(struct work_struct *work) { struct sched_clock_data *scd = this_scd(); @@ -199,8 +188,6 @@ void sched_clock_init_late(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); - else - __clear_sched_clock_stable(NULL); } /* -- cgit v1.2.3 From 06321dd2d1ae5b5bdc847958ab9e71d22a29a33e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 19 Jan 2017 09:31:52 -0500 Subject: locking/rwsem: Remove unnecessary atomic_long_t casts Since sem->count had been changed to a atomic_long_t type, it is no longer necessary to use the atomic_long_t cast anymore. So remove them. Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Arnd Bergmann Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1484836312-6656-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/asm-generic/rwsem.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 5be122e3d326..6c6a2141f271 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -33,7 +33,7 @@ */ static inline void __down_read(struct rw_semaphore *sem) { - if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0)) + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) rwsem_down_read_failed(sem); } @@ -58,7 +58,7 @@ static inline void __down_write(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) rwsem_down_write_failed(sem); } @@ -68,7 +68,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; @@ -91,7 +91,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; - tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count); + tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) rwsem_wake(sem); } @@ -102,7 +102,7 @@ static inline void __up_read(struct rw_semaphore *sem) static inline void __up_write(struct rw_semaphore *sem) { if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count) < 0)) + &sem->count) < 0)) rwsem_wake(sem); } @@ -120,8 +120,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, - (atomic_long_t *)&sem->count); + tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); if (tmp < 0) rwsem_downgrade_wake(sem); } -- cgit v1.2.3 From 9f8197980d87a28ec3d0b3b986f770e7e7878485 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jan 2017 10:59:13 +0000 Subject: delay: Add explanation of udelay() inaccuracy There seems to be some misunderstanding that udelay() and friends will always guarantee the specified delay. This is a false understanding. When udelay() is based on CPU cycles, it can return early for many reasons which are detailed by Linus' reply to me in a thread in 2011: http://lists.openwall.net/linux-kernel/2011/01/12/372 However, a udelay test module was created in 2014 which allows udelay() to only be 0.5% fast, which is outside of the CPU-cycles udelay() results I measured back in 2011, which were deemed to be in the "we don't care" region. test_udelay() should be fixed to reflect the real allowable tolerance on udelay(), rather than 0.5%. Cc: David Riley Cc: John Stultz Signed-off-by: Russell King Signed-off-by: John Stultz --- include/linux/delay.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index a6ecb34cf547..2ecb3c46b20a 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -5,6 +5,17 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. + * + * Please note that ndelay(), udelay() and mdelay() may return early for + * several reasons: + * 1. computed loops_per_jiffy too low (due to the time taken to + * execute the timer interrupt.) + * 2. cache behaviour affecting the time it takes to execute the + * loop function. + * 3. CPU clock rate changes. + * + * Please see this thread: + * http://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include -- cgit v1.2.3 From bcc9a76d5ac426bc45c9e863b1830347827ca77a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Jan 2017 21:33:35 -0500 Subject: locking/rwsem: Reinit wake_q after use In __rwsem_down_write_failed_common(), the same wake_q variable name is defined twice, with the inner wake_q hiding the one in outer scope. We can either use different names for the two wake_q's. Even better, we can use the same wake_q twice, if necessary. To enable the latter change, we need to define a new helper function wake_q_init() to enable reinitalization of wake_q after use. Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485052415-9611-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ kernel/locking/rwsem-xadd.c | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f4f9d32f12b3..4e62b378bd65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,12 @@ struct wake_q_head { #define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a3a381aee24b..2ad8d8dc3bb1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -502,8 +502,6 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * wake any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS) { - DEFINE_WAKE_Q(wake_q); - __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* * The wakeup is normally called _after_ the wait_lock @@ -513,6 +511,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * for attempting rwsem_try_write_lock(). */ wake_up_q(&wake_q); + + /* + * Reinitialize wake_q after use. + */ + wake_q_init(&wake_q); } } else -- cgit v1.2.3 From b18b6a9cef7f30e9a8b7738d5fc8d568cf660855 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 21 Jan 2017 00:09:08 -0500 Subject: timers: Omit POSIX timer stuff from task_struct when disabled When CONFIG_POSIX_TIMERS is disabled, it is preferable to remove related structures from struct task_struct and struct signal_struct as they won't contain anything useful and shouldn't be relied upon by mistake. Code still referencing those structures is also disabled here. Signed-off-by: Nicolas Pitre Signed-off-by: John Stultz --- fs/proc/base.c | 4 ++-- include/linux/init_task.h | 40 +++++++++++++++++++++++++--------------- include/linux/sched.h | 13 ++++++++++--- kernel/fork.c | 10 +++++++++- kernel/sched/rt.c | 4 ++++ kernel/sched/stats.h | 32 ++++++++++++++++++++------------ 6 files changed, 70 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/proc/base.c b/fs/proc/base.c index 8e7e61b28f31..03deeac1e835 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2179,7 +2179,7 @@ static const struct file_operations proc_map_files_operations = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; @@ -2936,7 +2936,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649d77ff..3a85d61f7614 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -42,6 +42,27 @@ extern struct fs_struct init_fs; #define INIT_PREV_CPUTIME(x) #endif +#ifdef CONFIG_POSIX_TIMERS +#define INIT_POSIX_TIMERS(s) \ + .posix_timers = LIST_HEAD_INIT(s.posix_timers), +#define INIT_CPU_TIMERS(s) \ + .cpu_timers = { \ + LIST_HEAD_INIT(s.cpu_timers[0]), \ + LIST_HEAD_INIT(s.cpu_timers[1]), \ + LIST_HEAD_INIT(s.cpu_timers[2]), \ + }, +#define INIT_CPUTIMER(s) \ + .cputimer = { \ + .cputime_atomic = INIT_CPUTIME_ATOMIC, \ + .running = false, \ + .checking_timer = false, \ + }, +#else +#define INIT_POSIX_TIMERS(s) +#define INIT_CPU_TIMERS(s) +#define INIT_CPUTIMER(s) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -49,14 +70,10 @@ extern struct fs_struct init_fs; .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ .signal = {{0}}}, \ - .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ - .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ + INIT_POSIX_TIMERS(sig) \ + INIT_CPU_TIMERS(sig) \ .rlim = INIT_RLIMITS, \ - .cputimer = { \ - .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = false, \ - .checking_timer = false, \ - }, \ + INIT_CPUTIMER(sig) \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ @@ -247,7 +264,7 @@ extern struct task_group root_task_group; .blocked = {{0}}, \ .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ - .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + INIT_CPU_TIMERS(tsk) \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ @@ -274,13 +291,6 @@ extern struct task_group root_task_group; } -#define INIT_CPU_TIMERS(cpu_timers) \ -{ \ - LIST_HEAD_INIT(cpu_timers[0]), \ - LIST_HEAD_INIT(cpu_timers[1]), \ - LIST_HEAD_INIT(cpu_timers[2]), \ -} - /* Attach to the init_task data structure for proper alignment */ #define __init_task_data __attribute__((__section__(".data..init_task"))) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d1905245c7a..e8f6af59726d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,13 +734,14 @@ struct signal_struct { unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; +#ifdef CONFIG_POSIX_TIMERS + /* POSIX.1b Interval Timers */ int posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; - struct pid *leader_pid; ktime_t it_real_incr; /* @@ -759,12 +760,16 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif - struct list_head cpu_timers[3]; - struct pid *tty_old_pgrp; /* boolean value for session group leader */ @@ -1681,8 +1686,10 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; +#ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#endif /* process credentials */ const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ diff --git a/kernel/fork.c b/kernel/fork.c index 11c5c8ab827c..105c6676d93b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1304,6 +1304,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) } } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a thread group. */ @@ -1322,6 +1323,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { } +#endif static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { @@ -1346,11 +1350,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); - INIT_LIST_HEAD(&sig->posix_timers); seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); #ifdef CONFIG_POSIX_TIMERS + INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; #endif @@ -1425,6 +1429,7 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a single task. */ @@ -1437,6 +1442,9 @@ static void posix_cpu_timers_init(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->cpu_timers[1]); INIT_LIST_HEAD(&tsk->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init(struct task_struct *tsk) { } +#endif static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2516b8df6dbb..a688a8206727 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2246,6 +2246,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) } } +#ifdef CONFIG_POSIX_TIMERS static void watchdog(struct rq *rq, struct task_struct *p) { unsigned long soft, hard; @@ -2267,6 +2268,9 @@ static void watchdog(struct rq *rq, struct task_struct *p) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } } +#else +static inline void watchdog(struct rq *rq, struct task_struct *p) { } +#endif static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 34659a853505..c69a9870ab79 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -172,18 +172,19 @@ sched_info_switch(struct rq *rq, */ /** - * cputimer_running - return true if cputimer is running + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running * * @tsk: Pointer to target task. */ -static inline bool cputimer_running(struct task_struct *tsk) - +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; /* Check if cputimer isn't running. This is accessed without locking. */ if (!READ_ONCE(cputimer->running)) - return false; + return NULL; /* * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime @@ -200,10 +201,17 @@ static inline bool cputimer_running(struct task_struct *tsk) * clock delta is behind the expiring timer value. */ if (unlikely(!tsk->sighand)) - return false; + return NULL; - return true; + return cputimer; +} +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; } +#endif /** * account_group_user_time - Maintain utime for a thread group. @@ -218,9 +226,9 @@ static inline bool cputimer_running(struct task_struct *tsk) static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.utime); @@ -239,9 +247,9 @@ static inline void account_group_user_time(struct task_struct *tsk, static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.stime); @@ -260,9 +268,9 @@ static inline void account_group_system_time(struct task_struct *tsk, static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); -- cgit v1.2.3 From 058fe1c0440e68a1ba3c2270ae43e9f0298b27d8 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 18 Jan 2017 11:24:53 -0800 Subject: perf/core: Make cgroup switch visit only cpuctxs with cgroup events This patch follows from a conversation in CQM/CMT's last series about speeding up the context switch for cgroup events: https://patchwork.kernel.org/patch/9478617/ This is a low-hanging fruit optimization. It replaces the iteration over the "pmus" list in cgroup switch by an iteration over a new list that contains only cpuctxs with at least one cgroup event. This is necessary because the number of PMUs have increased over the years e.g modern x86 server systems have well above 50 PMUs. The iteration over the full PMU list is unneccessary and can be costly in heavy cache contention scenarios. Below are some instrumentation measurements with 10, 50 and 90 percentiles of the total cost of context switch before and after this optimization for a simple array read/write microbenchark. Contention Level Nr events Before (us) After (us) Median L2 L3 types (10%, 50%, 90%) (10%, 50%, 90% Speedup -------------------------------------------------------------------------- Low Low 1 (1.72, 2.42, 5.85) (1.35, 1.64, 5.46) 29% High Low 1 (2.08, 4.56, 19.8) (1720, 2.20, 13.7) 51% High High 1 (2.86, 10.4, 12.7) (2.54, 4.32, 12.1) 58% Low Low 2 (1.98, 3.20, 6.89) (1.68, 2.41, 8.89) 24% High Low 2 (2.48, 5.28, 22.4) (2150, 3.69, 14.6) 30% High High 2 (3.32, 8.09, 13.9) (2.80, 5.15, 13.7) 36% where: 1 event type = cycles 2 event types = cycles,intel_cqm/llc_occupancy/ Contention L2 Low: workset < L2 cache size. High: " >> L2 " " . Contention L3 Low: workset of task on all sockets < L3 cache size. High: " " " " " " >> L3 " " . Median Speedup is (50%ile Before - 50%ile After) / 50%ile Before Unsurprisingly, the benefits of this optimization decrease with the number of cpuctxs with a cgroup events, yet, is never detrimental. Tested-by: Mark Rutland Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vikas Shivappa Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170118192454.58008-2-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 98 +++++++++++++++++++++------------------------- 2 files changed, 46 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 78ed8105e64d..dfa725723f28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,7 @@ struct perf_cpu_context { struct pmu *unique_pmu; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; + struct list_head cgrp_cpuctx_entry; #endif struct list_head sched_cb_entry; diff --git a/kernel/events/core.c b/kernel/events/core.c index e5aaa806702d..928a818d912e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -678,6 +678,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, info->timestamp = ctx->timestamp; } +static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); + #define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ #define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ @@ -690,61 +692,46 @@ perf_cgroup_set_timestamp(struct task_struct *task, static void perf_cgroup_switch(struct task_struct *task, int mode) { struct perf_cpu_context *cpuctx; - struct pmu *pmu; + struct list_head *list; unsigned long flags; /* - * disable interrupts to avoid geting nr_cgroup - * changes via __perf_event_disable(). Also - * avoids preemption. + * Disable interrupts and preemption to avoid this CPU's + * cgrp_cpuctx_entry to change under us. */ local_irq_save(flags); - /* - * we reschedule only in the presence of cgroup - * constrained events. - */ + list = this_cpu_ptr(&cgrp_cpuctx_list); + list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { + WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->unique_pmu != pmu) - continue; /* ensure we process each cpuctx once */ + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); - /* - * perf_cgroup_events says at least one - * context on this CPU has cgroup events. - * - * ctx->nr_cgroups reports the number of cgroup - * events for a context. - */ - if (cpuctx->ctx.nr_cgroups > 0) { - perf_ctx_lock(cpuctx, cpuctx->task_ctx); - perf_pmu_disable(cpuctx->ctx.pmu); - - if (mode & PERF_CGROUP_SWOUT) { - cpu_ctx_sched_out(cpuctx, EVENT_ALL); - /* - * must not be done before ctxswout due - * to event_filter_match() in event_sched_out() - */ - cpuctx->cgrp = NULL; - } + if (mode & PERF_CGROUP_SWOUT) { + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + /* + * must not be done before ctxswout due + * to event_filter_match() in event_sched_out() + */ + cpuctx->cgrp = NULL; + } - if (mode & PERF_CGROUP_SWIN) { - WARN_ON_ONCE(cpuctx->cgrp); - /* - * set cgrp before ctxsw in to allow - * event_filter_match() to not have to pass - * task around - * we pass the cpuctx->ctx to perf_cgroup_from_task() - * because cgorup events are only per-cpu - */ - cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); - } - perf_pmu_enable(cpuctx->ctx.pmu); - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); + if (mode & PERF_CGROUP_SWIN) { + WARN_ON_ONCE(cpuctx->cgrp); + /* + * set cgrp before ctxsw in to allow + * event_filter_match() to not have to pass + * task around + * we pass the cpuctx->ctx to perf_cgroup_from_task() + * because cgorup events are only per-cpu + */ + cpuctx->cgrp = perf_cgroup_from_task(task, + &cpuctx->ctx); + cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); } + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } local_irq_restore(flags); @@ -889,6 +876,7 @@ list_update_cgroup_event(struct perf_event *event, struct perf_event_context *ctx, bool add) { struct perf_cpu_context *cpuctx; + struct list_head *cpuctx_entry; if (!is_cgroup_event(event)) return; @@ -902,15 +890,16 @@ list_update_cgroup_event(struct perf_event *event, * this will always be called from the right CPU. */ cpuctx = __get_cpu_context(ctx); - - /* - * cpuctx->cgrp is NULL until a cgroup event is sched in or - * ctx->nr_cgroup == 0 . - */ - if (add && perf_cgroup_from_task(current, ctx) == event->cgrp) - cpuctx->cgrp = event->cgrp; - else if (!add) + cpuctx_entry = &cpuctx->cgrp_cpuctx_entry; + /* cpuctx->cgrp is NULL unless a cgroup event is active in this CPU .*/ + if (add) { + list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list)); + if (perf_cgroup_from_task(current, ctx) == event->cgrp) + cpuctx->cgrp = event->cgrp; + } else { + list_del(cpuctx_entry); cpuctx->cgrp = NULL; + } } #else /* !CONFIG_CGROUP_PERF */ @@ -10709,6 +10698,9 @@ static void __init perf_event_init_all_cpus(void) INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu)); raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu)); +#ifdef CONFIG_CGROUP_PERF + INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); +#endif INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu)); } } -- cgit v1.2.3 From 1fd7e416995401ec082fc0fe6090a223969beda5 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 18 Jan 2017 11:24:54 -0800 Subject: perf/core: Remove perf_cpu_context::unique_pmu cpuctx->unique_pmu was originally introduced as a way to identify cpuctxs with shared pmus in order to avoid visiting the same cpuctx more than once in a for_each_pmu loop. cpuctx->unique_pmu == cpuctx->pmu in non-software task contexts since they have only one pmu per cpuctx. Since perf_pmu_sched_task() is only called in hw contexts, this patch replaces cpuctx->unique_pmu by cpuctx->pmu in it. The change above, together with the previous patch in this series, removed the remaining uses of cpuctx->unique_pmu, so we remove it altogether. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vikas Shivappa Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170118192454.58008-3-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - kernel/events/core.c | 31 +------------------------------ 2 files changed, 1 insertion(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dfa725723f28..5c58e93c130c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -785,7 +785,6 @@ struct perf_cpu_context { ktime_t hrtimer_interval; unsigned int hrtimer_active; - struct pmu *unique_pmu; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; struct list_head cgrp_cpuctx_entry; diff --git a/kernel/events/core.c b/kernel/events/core.c index 928a818d912e..d92c7ad7715d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2932,7 +2932,7 @@ static void perf_pmu_sched_task(struct task_struct *prev, return; list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) { - pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */ + pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */ if (WARN_ON_ONCE(!pmu->sched_task)) continue; @@ -8636,37 +8636,10 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) return NULL; } -static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct perf_cpu_context *cpuctx; - - cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); - - if (cpuctx->unique_pmu == old_pmu) - cpuctx->unique_pmu = pmu; - } -} - static void free_pmu_context(struct pmu *pmu) { - struct pmu *i; - mutex_lock(&pmus_lock); - /* - * Like a real lame refcount. - */ - list_for_each_entry(i, &pmus, entry) { - if (i->pmu_cpu_context == pmu->pmu_cpu_context) { - update_pmu_context(i, pmu); - goto out; - } - } - free_percpu(pmu->pmu_cpu_context); -out: mutex_unlock(&pmus_lock); } @@ -8870,8 +8843,6 @@ skip_type: cpuctx->ctx.pmu = pmu; __perf_mux_hrtimer_init(cpuctx, cpu); - - cpuctx->unique_pmu = pmu; } got_cpu_context: -- cgit v1.2.3