summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/locking/mutex-design.rst2
-rw-r--r--arch/arm/include/asm/percpu.h2
-rw-r--r--arch/arm/include/asm/thread_info.h5
-rw-r--r--arch/powerpc/include/asm/dtl.h52
-rw-r--r--arch/powerpc/include/asm/lppaca.h44
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c1
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/svm.c1
-rw-r--r--arch/s390/include/asm/smp.h1
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/sparc/include/asm/percpu_64.h2
-rw-r--r--arch/sparc/include/asm/trap_block.h2
-rw-r--r--arch/x86/entry/common.c88
-rw-r--r--arch/x86/include/asm/idtentry.h31
-rw-r--r--arch/x86/kernel/kvm.c6
-rw-r--r--arch/x86/kernel/nmi.c9
-rw-r--r--arch/x86/kernel/traps.c23
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--include/linux/hardirq.h28
-rw-r--r--include/linux/irqflags.h23
-rw-r--r--include/linux/lockdep.h211
-rw-r--r--include/linux/lockdep_types.h194
-rw-r--r--include/linux/rwsem.h20
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/spinlock.h1
-rw-r--r--include/linux/spinlock_types.h2
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c114
-rw-r--r--kernel/kcsan/core.c5
-rw-r--r--kernel/kcsan/report.c9
-rw-r--r--kernel/locking/lockdep.c80
-rw-r--r--kernel/softirq.c8
-rw-r--r--tools/include/linux/irqflags.h4
37 files changed, 531 insertions, 457 deletions
diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst
index 4d8236b81fa5..8f3e9a5141f9 100644
--- a/Documentation/locking/mutex-design.rst
+++ b/Documentation/locking/mutex-design.rst
@@ -18,7 +18,7 @@ as an alternative to these. This new data structure provided a number
of advantages, including simpler interfaces, and at that time smaller
code (see Disadvantages).
-[1] http://lwn.net/Articles/164802/
+[1] https://lwn.net/Articles/164802/
Implementation
--------------
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index f44f448537f2..e2fcb3cfd3de 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -5,6 +5,8 @@
#ifndef _ASM_ARM_PERCPU_H_
#define _ASM_ARM_PERCPU_H_
+register unsigned long current_stack_pointer asm ("sp");
+
/*
* Same as asm-generic/percpu.h, except that we store the per cpu offset
* in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 3609a6980c34..536b6b979f63 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -76,11 +76,6 @@ struct thread_info {
}
/*
- * how to get the current stack pointer in C
- */
-register unsigned long current_stack_pointer asm ("sp");
-
-/*
* how to get the thread information struct from C
*/
static inline struct thread_info *current_thread_info(void) __attribute_const__;
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
new file mode 100644
index 000000000000..1625888f27ef
--- /dev/null
+++ b/arch/powerpc/include/asm/dtl.h
@@ -0,0 +1,52 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include <asm/lppaca.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+ u8 dispatch_reason;
+ u8 preempt_reason;
+ __be16 processor_id;
+ __be32 enqueue_to_dispatch_time;
+ __be32 ready_to_enqueue_time;
+ __be32 waiting_to_ready_time;
+ __be64 timebase;
+ __be64 fault_addr;
+ __be64 srr0;
+ __be64 srr1;
+};
+
+#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
+#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ * 0x1: voluntary virtual processor waits
+ * 0x2: time-slice preempts
+ * 0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE 0x1
+#define DTL_LOG_PREEMPT 0x2
+#define DTL_LOG_FAULT 0x4
+#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log. If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif /* _ASM_POWERPC_DTL_H */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 3b4b305796ae..c390ec377bae 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -42,7 +42,6 @@
*/
#include <linux/cache.h>
#include <linux/threads.h>
-#include <linux/spinlock_types.h>
#include <asm/types.h>
#include <asm/mmu.h>
#include <asm/firmware.h>
@@ -146,49 +145,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
} ____cacheline_aligned;
-/*
- * Layout of entries in the hypervisor's dispatch trace log buffer.
- */
-struct dtl_entry {
- u8 dispatch_reason;
- u8 preempt_reason;
- __be16 processor_id;
- __be32 enqueue_to_dispatch_time;
- __be32 ready_to_enqueue_time;
- __be32 waiting_to_ready_time;
- __be64 timebase;
- __be64 fault_addr;
- __be64 srr0;
- __be64 srr1;
-};
-
-#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
-#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
-
-/*
- * Dispatch trace log event enable mask:
- * 0x1: voluntary virtual processor waits
- * 0x2: time-slice preempts
- * 0x4: virtual partition memory page faults
- */
-#define DTL_LOG_CEDE 0x1
-#define DTL_LOG_PREEMPT 0x2
-#define DTL_LOG_FAULT 0x4
-#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
-
-extern struct kmem_cache *dtl_cache;
-extern rwlock_t dtl_access_lock;
-
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log. If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
-extern void register_dtl_buffer(int cpu);
-extern void alloc_dtl_buffers(unsigned long *time_limit);
extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
#endif /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 45a839a7c6cf..84b2564cf5a4 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -29,7 +29,6 @@
#include <asm/hmi.h>
#include <asm/cpuidle.h>
#include <asm/atomic.h>
-#include <asm/rtas-types.h>
#include <asm-generic/mmiowb_types.h>
@@ -53,6 +52,7 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
+struct rtas_args;
/*
* Defines the layout of the paca.
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6fcae436ae51..f85539ebb513 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -183,6 +183,8 @@ static inline unsigned long read_spurr(unsigned long tb)
#ifdef CONFIG_PPC_SPLPAR
+#include <asm/dtl.h>
+
/*
* Scan the dispatch trace log and count up the stolen time.
* Should be called with interrupts disabled.
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6bf66649ab92..ebb04f331ad3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
+#include <asm/dtl.h>
#include "book3s.h"
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index eab8aa293743..982f069e4c31 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -12,6 +12,7 @@
#include <asm/smp.h>
#include <linux/uaccess.h>
#include <asm/firmware.h>
+#include <asm/dtl.h>
#include <asm/lppaca.h>
#include <asm/debugfs.h>
#include <asm/plpar_wrappers.h>
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fd26f3d21d7b..f71ff2c94efe 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -40,6 +40,7 @@
#include <asm/fadump.h>
#include <asm/asm-prototypes.h>
#include <asm/debugfs.h>
+#include <asm/dtl.h>
#include "pseries.h"
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2db8469e475f..27094c872fd6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -70,6 +70,7 @@
#include <asm/idle.h>
#include <asm/swiotlb.h>
#include <asm/svm.h>
+#include <asm/dtl.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 40c0637203d5..e6d7a344d9f2 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -11,6 +11,7 @@
#include <asm/svm.h>
#include <asm/swiotlb.h>
#include <asm/ultravisor.h>
+#include <asm/dtl.h>
static int __init init_svm(void)
{
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7326f110d48c..f48a43b63d9e 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
#include <asm/sigp.h>
#include <asm/lowcore.h>
+#include <asm/processor.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index e582fbe59e20..13a04fcf7762 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
#include <asm/page.h>
-#include <asm/processor.h>
#define STACK_INIT_OFFSET \
(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
diff --git a/arch/sparc/include/asm/percpu_64.h b/arch/sparc/include/asm/percpu_64.h
index 32ef6f05cc56..a8786a4b90b6 100644
--- a/arch/sparc/include/asm/percpu_64.h
+++ b/arch/sparc/include/asm/percpu_64.h
@@ -4,7 +4,9 @@
#include <linux/compiler.h>
+#ifndef BUILD_VDSO
register unsigned long __local_per_cpu_offset asm("g5");
+#endif
#ifdef CONFIG_SMP
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 0f6d0c4f6683..ace0d48e837e 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -2,6 +2,8 @@
#ifndef _SPARC_TRAP_BLOCK_H
#define _SPARC_TRAP_BLOCK_H
+#include <linux/threads.h>
+
#include <asm/hypervisor.h>
#include <asm/asi.h>
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index f09288431f28..54ad1890aefc 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -559,8 +559,7 @@ SYSCALL_DEFINE0(ni_syscall)
}
/**
- * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional
- * RCU handling
+ * idtentry_enter - Handle state tracking on ordinary idtentries
* @regs: Pointer to pt_regs of interrupted context
*
* Invokes:
@@ -572,6 +571,9 @@ SYSCALL_DEFINE0(ni_syscall)
* - The hardirq tracer to keep the state consistent as low level ASM
* entry disabled interrupts.
*
+ * As a precondition, this requires that the entry came from user mode,
+ * idle, or a kernel context in which RCU is watching.
+ *
* For kernel mode entries RCU handling is done conditional. If RCU is
* watching then the only RCU requirement is to check whether the tick has
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
@@ -585,18 +587,21 @@ SYSCALL_DEFINE0(ni_syscall)
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
* would not be possible.
*
- * Returns: True if RCU has been adjusted on a kernel entry
- * False otherwise
+ * Returns: An opaque object that must be passed to idtentry_exit()
*
- * The return value must be fed into the rcu_exit argument of
- * idtentry_exit_cond_rcu().
+ * The return value must be fed into the state argument of
+ * idtentry_exit().
*/
-bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs)
{
+ idtentry_state_t ret = {
+ .exit_rcu = false,
+ };
+
if (user_mode(regs)) {
check_user_regs(regs);
enter_from_user_mode();
- return false;
+ return ret;
}
/*
@@ -634,7 +639,8 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
trace_hardirqs_off_finish();
instrumentation_end();
- return true;
+ ret.exit_rcu = true;
+ return ret;
}
/*
@@ -649,7 +655,7 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
trace_hardirqs_off();
instrumentation_end();
- return false;
+ return ret;
}
static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
@@ -667,10 +673,9 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
}
/**
- * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU
- * handling
+ * idtentry_exit - Handle return from exception that used idtentry_enter()
* @regs: Pointer to pt_regs (exception entry regs)
- * @rcu_exit: Invoke rcu_irq_exit() if true
+ * @state: Return value from matching call to idtentry_enter()
*
* Depending on the return target (kernel/user) this runs the necessary
* preemption and work checks if possible and reguired and returns to
@@ -679,10 +684,10 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
* This is the last action before returning to the low level ASM code which
* just needs to return to the appropriate context.
*
- * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
- * function must be fed into the @rcu_exit argument.
+ * Counterpart to idtentry_enter(). The return value of the entry
+ * function must be fed into the @state argument.
*/
-void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
{
lockdep_assert_irqs_disabled();
@@ -695,7 +700,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
* carefully and needs the same ordering of lockdep/tracing
* and RCU as the return to user mode path.
*/
- if (rcu_exit) {
+ if (state.exit_rcu) {
instrumentation_begin();
/* Tell the tracer that IRET will enable interrupts */
trace_hardirqs_on_prepare();
@@ -714,7 +719,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
* IRQ flags state is correct already. Just tell RCU if it
* was not watching on entry.
*/
- if (rcu_exit)
+ if (state.exit_rcu)
rcu_irq_exit();
}
}
@@ -726,7 +731,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
* Invokes enter_from_user_mode() to establish the proper context for
* NOHZ_FULL. Otherwise scheduling on exit would not be possible.
*/
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
{
check_user_regs(regs);
enter_from_user_mode();
@@ -744,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs)
*
* Counterpart to idtentry_enter_user().
*/
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
prepare_exit_to_usermode(regs);
}
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+ bool irq_state = lockdep_hardirqs_enabled();
+
+ __nmi_enter();
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirq_enter();
+ rcu_nmi_enter();
+
+ instrumentation_begin();
+ trace_hardirqs_off_finish();
+ ftrace_nmi_enter();
+ instrumentation_end();
+
+ return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+ instrumentation_begin();
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+ instrumentation_end();
+
+ rcu_nmi_exit();
+ lockdep_hardirq_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ __nmi_exit();
+}
+
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
@@ -800,9 +839,10 @@ static void __xen_pv_evtchn_do_upcall(void)
__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs;
- bool inhcall, rcu_exit;
+ bool inhcall;
+ idtentry_state_t state;
- rcu_exit = idtentry_enter_cond_rcu(regs);
+ state = idtentry_enter(regs);
old_regs = set_irq_regs(regs);
instrumentation_begin();
@@ -812,13 +852,13 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
set_irq_regs(old_regs);
inhcall = get_and_clear_inhcall();
- if (inhcall && !WARN_ON_ONCE(rcu_exit)) {
+ if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
instrumentation_begin();
idtentry_exit_cond_resched(regs, true);
instrumentation_end();
restore_inhcall(inhcall);
} else {
- idtentry_exit_cond_rcu(regs, rcu_exit);
+ idtentry_exit(regs, state);
}
}
#endif /* CONFIG_XEN_PV */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 80d3b30d3ee3..d74128c964f8 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -13,8 +13,15 @@
void idtentry_enter_user(struct pt_regs *regs);
void idtentry_exit_user(struct pt_regs *regs);
-bool idtentry_enter_cond_rcu(struct pt_regs *regs);
-void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
+typedef struct idtentry_state {
+ bool exit_rcu;
+} idtentry_state_t;
+
+idtentry_state_t idtentry_enter(struct pt_regs *regs);
+void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
+
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
@@ -54,12 +61,12 @@ static __always_inline void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__##func (regs); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs)
@@ -101,12 +108,12 @@ static __always_inline void __##func(struct pt_regs *regs, \
__visible noinstr void func(struct pt_regs *regs, \
unsigned long error_code) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__##func (regs, error_code); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs, \
@@ -199,7 +206,7 @@ static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
__visible noinstr void func(struct pt_regs *regs, \
unsigned long error_code) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
irq_enter_rcu(); \
@@ -207,7 +214,7 @@ __visible noinstr void func(struct pt_regs *regs, \
__##func (regs, (u8)error_code); \
irq_exit_rcu(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs, u8 vector)
@@ -241,7 +248,7 @@ static void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
irq_enter_rcu(); \
@@ -249,7 +256,7 @@ __visible noinstr void func(struct pt_regs *regs) \
run_on_irqstack_cond(__##func, regs, regs); \
irq_exit_rcu(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static noinline void __##func(struct pt_regs *regs)
@@ -270,7 +277,7 @@ static __always_inline void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__irq_enter_raw(); \
@@ -278,7 +285,7 @@ __visible noinstr void func(struct pt_regs *regs) \
__##func (regs); \
__irq_exit_raw(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index df63786e7bfa..3f78482d9496 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
u32 reason = kvm_read_and_reset_apf_flags();
- bool rcu_exit;
+ idtentry_state_t state;
switch (reason) {
case KVM_PV_REASON_PAGE_NOT_PRESENT:
@@ -243,7 +243,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
return false;
}
- rcu_exit = idtentry_enter_cond_rcu(regs);
+ state = idtentry_enter(regs);
instrumentation_begin();
/*
@@ -264,7 +264,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
}
instrumentation_end();
- idtentry_exit_cond_rcu(regs, rcu_exit);
+ idtentry_exit(regs, state);
return true;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index d7c5e44b26f7..4fc9954a9560 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin();
- trace_hardirqs_off_finish();
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs);
out:
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
}
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
+ bool irq_state;
+
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -491,14 +490,14 @@ nmi_restart:
this_cpu_write(nmi_dr7, local_db_save());
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
inc_irq_stat(__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
local_db_restore(this_cpu_read(nmi_dr7));
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b7cb3e0716f7..8493f55e1167 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -245,7 +245,7 @@ static noinstr bool handle_bug(struct pt_regs *regs)
DEFINE_IDTENTRY_RAW(exc_invalid_op)
{
- bool rcu_exit;
+ idtentry_state_t state;
/*
* We use UD2 as a short encoding for 'CALL __WARN', as such
@@ -255,11 +255,11 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op)
if (!user_mode(regs) && handle_bug(regs))
return;
- rcu_exit = idtentry_enter_cond_rcu(regs);
+ state = idtentry_enter(regs);
instrumentation_begin();
handle_invalid_op(regs);
instrumentation_end();
- idtentry_exit_cond_rcu(regs, rcu_exit);
+ idtentry_exit(regs, state);
}
DEFINE_IDTENTRY(exc_coproc_segment_overrun)
@@ -405,7 +405,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
}
#endif
- nmi_enter();
+ idtentry_enter_nmi(regs);
instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -651,15 +651,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
instrumentation_end();
idtentry_exit_user(regs);
} else {
- nmi_enter();
+ bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
- trace_hardirqs_off_finish();
if (!do_int3(regs))
die("int3", regs, 0);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
}
@@ -867,9 +864,8 @@ out:
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
- nmi_enter();
+ bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
- trace_hardirqs_off_finish();
/*
* If something gets miswired and we end up here for a user mode
@@ -886,10 +882,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
handle_debug(regs, dr6, false);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -905,6 +899,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
instrumentation_begin();
handle_debug(regs, dr6, true);
+
instrumentation_end();
idtentry_exit_user(regs);
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..5e41949453cc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1377,7 +1377,7 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
unsigned long address = read_cr2();
- bool rcu_exit;
+ idtentry_state_t state;
prefetchw(&current->mm->mmap_lock);
@@ -1412,11 +1412,11 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
* code reenabled RCU to avoid subsequent wreckage which helps
* debugability.
*/
- rcu_exit = idtentry_enter_cond_rcu(regs);
+ state = idtentry_enter(regs);
instrumentation_begin();
handle_page_fault(regs, error_code, address);
instrumentation_end();
- idtentry_exit_cond_rcu(regs, rcu_exit);
+ idtentry_exit(regs, state);
}
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..754f67ac4326 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define __nmi_enter() \
do { \
+ lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
- lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
- rcu_nmi_enter(); \
+ } while (0)
+
+#define nmi_enter() \
+ do { \
+ __nmi_enter(); \
lockdep_hardirq_enter(); \
+ rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
+#define __nmi_exit() \
+ do { \
+ BUG_ON(!in_nmi()); \
+ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ printk_nmi_exit(); \
+ arch_nmi_exit(); \
+ lockdep_on(); \
+ } while (0)
+
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
- lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
- BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_on(); \
- printk_nmi_exit(); \
- arch_nmi_exit(); \
+ lockdep_hardirq_exit(); \
+ __nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 6384d2813ded..5811ee8a5cd8 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@
#include <linux/typecheck.h>
#include <asm/irqflags.h>
+#include <asm/percpu.h>
/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,22 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
extern void trace_hardirqs_on_prepare(void);
extern void trace_hardirqs_off_finish(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p) ((p)->hardirq_context)
+# define lockdep_hardirq_context() (this_cpu_read(hardirq_context))
# define lockdep_softirq_context(p) ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled))
# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter() \
-do { \
- if (!current->hardirq_context++) \
- current->hardirq_threaded = 0; \
+# define lockdep_hardirq_enter() \
+do { \
+ if (this_cpu_inc_return(hardirq_context) == 1) \
+ current->hardirq_threaded = 0; \
} while (0)
# define lockdep_hardirq_threaded() \
do { \
@@ -50,7 +55,7 @@ do { \
} while (0)
# define lockdep_hardirq_exit() \
do { \
- current->hardirq_context--; \
+ this_cpu_dec(hardirq_context); \
} while (0)
# define lockdep_softirq_enter() \
do { \
@@ -104,9 +109,9 @@ do { \
# define trace_hardirqs_off_finish() do { } while (0)
# define trace_hardirqs_on() do { } while (0)
# define trace_hardirqs_off() do { } while (0)
-# define lockdep_hardirq_context(p) 0
+# define lockdep_hardirq_context() 0
# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_hardirqs_enabled() 0
# define lockdep_softirqs_enabled(p) 0
# define lockdep_hardirq_enter() do { } while (0)
# define lockdep_hardirq_threaded() do { } while (0)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8fce5c98a4b0..7aafba0ddcf9 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -10,33 +10,15 @@
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
+#include <linux/lockdep_types.h>
+#include <asm/percpu.h>
+
struct task_struct;
-struct lockdep_map;
/* for sysctl */
extern int prove_locking;
extern int lock_stat;
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#include <linux/types.h>
-
-enum lockdep_wait_type {
- LD_WAIT_INV = 0, /* not checked, catch all */
-
- LD_WAIT_FREE, /* wait free, rcu etc.. */
- LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
-
-#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
- LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
-#else
- LD_WAIT_CONFIG = LD_WAIT_SPIN,
-#endif
- LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
-
- LD_WAIT_MAX, /* must be last */
-};
-
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
@@ -44,147 +26,6 @@ enum lockdep_wait_type {
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
-/*
- * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
- * the total number of states... :-(
- */
-#define XXX_LOCK_USAGE_STATES (1+2*4)
-
-/*
- * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
- * cached in the instance of lockdep_map
- *
- * Currently main class (subclass == 0) and signle depth subclass
- * are cached in lockdep_map. This optimization is mainly targeting
- * on rq->lock. double_rq_lock() acquires this highly competitive with
- * single depth.
- */
-#define NR_LOCKDEP_CACHING_CLASSES 2
-
-/*
- * A lockdep key is associated with each lock object. For static locks we use
- * the lock address itself as the key. Dynamically allocated lock objects can
- * have a statically or dynamically allocated key. Dynamically allocated lock
- * keys must be registered before being used and must be unregistered before
- * the key memory is freed.
- */
-struct lockdep_subclass_key {
- char __one_byte;
-} __attribute__ ((__packed__));
-
-/* hash_entry is used to keep track of dynamically allocated keys. */
-struct lock_class_key {
- union {
- struct hlist_node hash_entry;
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
- };
-};
-
-extern struct lock_class_key __lockdep_no_validate__;
-
-struct lock_trace;
-
-#define LOCKSTAT_POINTS 4
-
-/*
- * The lock-class itself. The order of the structure members matters.
- * reinit_class() zeroes the key member and all subsequent members.
- */
-struct lock_class {
- /*
- * class-hash:
- */
- struct hlist_node hash_entry;
-
- /*
- * Entry in all_lock_classes when in use. Entry in free_lock_classes
- * when not in use. Instances that are being freed are on one of the
- * zapped_classes lists.
- */
- struct list_head lock_entry;
-
- /*
- * These fields represent a directed graph of lock dependencies,
- * to every node we attach a list of "forward" and a list of
- * "backward" graph nodes.
- */
- struct list_head locks_after, locks_before;
-
- const struct lockdep_subclass_key *key;
- unsigned int subclass;
- unsigned int dep_gen_id;
-
- /*
- * IRQ/softirq usage tracking bits:
- */
- unsigned long usage_mask;
- const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
-
- /*
- * Generation counter, when doing certain classes of graph walking,
- * to ensure that we check one node only once:
- */
- int name_version;
- const char *name;
-
- short wait_type_inner;
- short wait_type_outer;
-
-#ifdef CONFIG_LOCK_STAT
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
-#endif
-} __no_randomize_layout;
-
-#ifdef CONFIG_LOCK_STAT
-struct lock_time {
- s64 min;
- s64 max;
- s64 total;
- unsigned long nr;
-};
-
-enum bounce_type {
- bounce_acquired_write,
- bounce_acquired_read,
- bounce_contended_write,
- bounce_contended_read,
- nr_bounce_types,
-
- bounce_acquired = bounce_acquired_write,
- bounce_contended = bounce_contended_write,
-};
-
-struct lock_class_stats {
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
- struct lock_time read_waittime;
- struct lock_time write_waittime;
- struct lock_time read_holdtime;
- struct lock_time write_holdtime;
- unsigned long bounces[nr_bounce_types];
-};
-
-struct lock_class_stats lock_stats(struct lock_class *class);
-void clear_lock_stats(struct lock_class *class);
-#endif
-
-/*
- * Map the lock object (the lock instance) to the lock-class object.
- * This is embedded into specific lock instances:
- */
-struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
- short wait_type_outer; /* can be taken in this context */
- short wait_type_inner; /* presents this context */
-#ifdef CONFIG_LOCK_STAT
- int cpu;
- unsigned long ip;
-#endif
-};
-
static inline void lockdep_copy_map(struct lockdep_map *to,
struct lockdep_map *from)
{
@@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
-struct pin_cookie { unsigned int val; };
-
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
@@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
-/*
- * The class key takes no space if lockdep is disabled:
- */
-struct lock_class_key { };
static inline void lockdep_register_key(struct lock_class_key *key)
{
@@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}
-/*
- * The lockdep_map takes no space if lockdep is disabled:
- */
-struct lockdep_map { };
-
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
@@ -549,8 +379,6 @@ struct lockdep_map { };
#define lockdep_recursing(tsk) (0)
-struct pin_cookie { };
-
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
@@ -703,28 +531,29 @@ do { \
lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)
-#define lockdep_assert_irqs_enabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled, \
- "IRQs not enabled as expected\n"); \
- } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
-#define lockdep_assert_irqs_disabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled, \
- "IRQs not disabled as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_enabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
+} while (0)
-#define lockdep_assert_in_irq() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context, \
- "Not in hardirq as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_disabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
+} while (0)
+
+#define lockdep_assert_in_irq() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \
+} while (0)
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)
+
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
@@ -734,7 +563,7 @@ do { \
# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirq_context && \
+ lockdep_hardirq_context() && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
new file mode 100644
index 000000000000..bb35b449f533
--- /dev/null
+++ b/include/linux/lockdep_types.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Runtime locking correctness validator
+ *
+ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * see Documentation/locking/lockdep-design.rst for more details.
+ */
+#ifndef __LINUX_LOCKDEP_TYPES_H
+#define __LINUX_LOCKDEP_TYPES_H
+
+#include <linux/types.h>
+
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+enum lockdep_wait_type {
+ LD_WAIT_INV = 0, /* not checked, catch all */
+
+ LD_WAIT_FREE, /* wait free, rcu etc.. */
+ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
+
+#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
+ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+#else
+ LD_WAIT_CONFIG = LD_WAIT_SPIN,
+#endif
+ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
+
+ LD_WAIT_MAX, /* must be last */
+};
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
+ * the total number of states... :-(
+ */
+#define XXX_LOCK_USAGE_STATES (1+2*4)
+
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES 2
+
+/*
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
+ */
+struct lockdep_subclass_key {
+ char __one_byte;
+} __attribute__ ((__packed__));
+
+/* hash_entry is used to keep track of dynamically allocated keys. */
+struct lock_class_key {
+ union {
+ struct hlist_node hash_entry;
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+ };
+};
+
+extern struct lock_class_key __lockdep_no_validate__;
+
+struct lock_trace;
+
+#define LOCKSTAT_POINTS 4
+
+/*
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
+ */
+struct lock_class {
+ /*
+ * class-hash:
+ */
+ struct hlist_node hash_entry;
+
+ /*
+ * Entry in all_lock_classes when in use. Entry in free_lock_classes
+ * when not in use. Instances that are being freed are on one of the
+ * zapped_classes lists.
+ */
+ struct list_head lock_entry;
+
+ /*
+ * These fields represent a directed graph of lock dependencies,
+ * to every node we attach a list of "forward" and a list of
+ * "backward" graph nodes.
+ */
+ struct list_head locks_after, locks_before;
+
+ const struct lockdep_subclass_key *key;
+ unsigned int subclass;
+ unsigned int dep_gen_id;
+
+ /*
+ * IRQ/softirq usage tracking bits:
+ */
+ unsigned long usage_mask;
+ const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
+
+ /*
+ * Generation counter, when doing certain classes of graph walking,
+ * to ensure that we check one node only once:
+ */
+ int name_version;
+ const char *name;
+
+ short wait_type_inner;
+ short wait_type_outer;
+
+#ifdef CONFIG_LOCK_STAT
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+#endif
+} __no_randomize_layout;
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+ s64 min;
+ s64 max;
+ s64 total;
+ unsigned long nr;
+};
+
+enum bounce_type {
+ bounce_acquired_write,
+ bounce_acquired_read,
+ bounce_contended_write,
+ bounce_contended_read,
+ nr_bounce_types,
+
+ bounce_acquired = bounce_acquired_write,
+ bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+ struct lock_time read_waittime;
+ struct lock_time write_waittime;
+ struct lock_time read_holdtime;
+ struct lock_time write_holdtime;
+ unsigned long bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+ short wait_type_outer; /* can be taken in this context */
+ short wait_type_inner; /* presents this context */
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+struct pin_cookie { unsigned int val; };
+
+#else /* !CONFIG_LOCKDEP */
+
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+/*
+ * The lockdep_map takes no space if lockdep is disabled:
+ */
+struct lockdep_map { };
+
+struct pin_cookie { };
+
+#endif /* !LOCKDEP */
+
+#endif /* __LINUX_LOCKDEP_TYPES_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 7e5b2a4eb560..25e3fde85617 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
}
#define RWSEM_UNLOCKED_VALUE 0L
-#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
/* Common initializer macros and functions */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __RWSEM_DEP_MAP_INIT(lockname) \
- , .dep_map = { \
+ .dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SLEEP, \
- }
+ },
#else
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
#ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname
+# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
#else
-# define __DEBUG_RWSEM_INITIALIZER(lockname)
+# define __RWSEM_DEBUG_INIT(lockname)
#endif
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
#else
#define __RWSEM_OPT_INIT(lockname)
#endif
#define __RWSEM_INITIALIZER(name) \
- { __RWSEM_INIT_COUNT(name), \
+ { __RWSEM_COUNT_INIT(name), \
.owner = ATOMIC_LONG_INIT(0), \
- .wait_list = LIST_HEAD_INIT((name).wait_list), \
- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
__RWSEM_OPT_INIT(name) \
- __DEBUG_RWSEM_INITIALIZER(name) \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
+ .wait_list = LIST_HEAD_INIT((name).wait_list), \
+ __RWSEM_DEBUG_INIT(name) \
__RWSEM_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(name) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 683372943093..8d1de021b315 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -986,8 +986,6 @@ struct task_struct {
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
u64 hardirq_chain_key;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d3770b3f9d9a..f2f12d746dbd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -56,6 +56,7 @@
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 6102e6bff3ae..b981caafe8bf 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -15,7 +15,7 @@
# include <linux/spinlock_types_up.h>
#endif
-#include <linux/lockdep.h>
+#include <linux/lockdep_types.h>
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..70d9d0a4de2a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1954,8 +1954,8 @@ static __latent_entropy struct task_struct *copy_process(
rt_mutex_init_task(p);
+ lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
- DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
@@ -2036,7 +2036,6 @@ static __latent_entropy struct task_struct *copy_process(
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
- p->hardirqs_enabled = 0;
p->hardirq_enable_ip = 0;
p->hardirq_enable_event = 0;
p->hardirq_disable_ip = _THIS_IP_;
@@ -2046,7 +2045,6 @@ static __latent_entropy struct task_struct *copy_process(
p->softirq_enable_event = 0;
p->softirq_disable_ip = 0;
p->softirq_disable_event = 0;
- p->hardirq_context = 0;
p->softirq_context = 0;
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index e646661f6282..4616d4ad609d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -32,30 +32,13 @@
* "But they come in a choice of three flavours!"
*/
#include <linux/compat.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/fs.h>
-#include <linux/file.h>
#include <linux/jhash.h>
-#include <linux/init.h>
-#include <linux/futex.h>
-#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
-#include <linux/signal.h>
-#include <linux/export.h>
-#include <linux/magic.h>
-#include <linux/pid.h>
-#include <linux/nsproxy.h>
-#include <linux/ptrace.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/freezer.h>
#include <linux/memblock.h>
#include <linux/fault-inject.h>
-#include <linux/refcount.h>
#include <asm/futex.h>
@@ -476,7 +459,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
- * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
* @key: address where result is stored.
* @rw: mapping needs to be read/write (values: FUTEX_READ,
* FUTEX_WRITE)
@@ -500,8 +483,8 @@ static u64 get_inode_sequence_number(struct inode *inode)
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
-static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
+ enum futex_access rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -538,7 +521,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
again:
/* Ignore any VERIFY_READ mapping (futex common case) */
- if (unlikely(should_fail_futex(fshared)))
+ if (unlikely(should_fail_futex(true)))
return -EFAULT;
err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
@@ -626,7 +609,7 @@ again:
* A RO anonymous page will never change and thus doesn't make
* sense for futex operations.
*/
- if (unlikely(should_fail_futex(fshared)) || ro) {
+ if (unlikely(should_fail_futex(true)) || ro) {
err = -EFAULT;
goto out;
}
@@ -677,10 +660,6 @@ out:
return err;
}
-static inline void put_futex_key(union futex_key *key)
-{
-}
-
/**
* fault_in_user_writeable() - Fault in user address and verify RW access
* @uaddr: pointer to faulting user space address
@@ -1611,13 +1590,13 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
hb = hash_futex(&key);
/* Make sure we really have tasks to wakeup */
if (!hb_waiters_pending(hb))
- goto out_put_key;
+ return ret;
spin_lock(&hb->lock);
@@ -1640,9 +1619,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
spin_unlock(&hb->lock);
wake_up_q(&wake_q);
-out_put_key:
- put_futex_key(&key);
-out:
return ret;
}
@@ -1709,10 +1685,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
retry:
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
- goto out_put_key1;
+ return ret;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -1730,13 +1706,13 @@ retry_private:
* an MMU, but we might get them from range checking
*/
ret = op_ret;
- goto out_put_keys;
+ return ret;
}
if (op_ret == -EFAULT) {
ret = fault_in_user_writeable(uaddr2);
if (ret)
- goto out_put_keys;
+ return ret;
}
if (!(flags & FLAGS_SHARED)) {
@@ -1744,8 +1720,6 @@ retry_private:
goto retry_private;
}
- put_futex_key(&key2);
- put_futex_key(&key1);
cond_resched();
goto retry;
}
@@ -1781,11 +1755,6 @@ retry_private:
out_unlock:
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
-out_put_keys:
- put_futex_key(&key2);
-out_put_key1:
- put_futex_key(&key1);
-out:
return ret;
}
@@ -1992,20 +1961,18 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
retry:
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
requeue_pi ? FUTEX_WRITE : FUTEX_READ);
if (unlikely(ret != 0))
- goto out_put_key1;
+ return ret;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
- if (requeue_pi && match_futex(&key1, &key2)) {
- ret = -EINVAL;
- goto out_put_keys;
- }
+ if (requeue_pi && match_futex(&key1, &key2))
+ return -EINVAL;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -2025,13 +1992,11 @@ retry_private:
ret = get_user(curval, uaddr1);
if (ret)
- goto out_put_keys;
+ return ret;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&key2);
- put_futex_key(&key1);
goto retry;
}
if (curval != *cmpval) {
@@ -2090,12 +2055,10 @@ retry_private:
case -EFAULT:
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
- goto out;
+ return ret;
case -EBUSY:
case -EAGAIN:
/*
@@ -2106,8 +2069,6 @@ retry_private:
*/
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -2216,12 +2177,6 @@ out_unlock:
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
-
-out_put_keys:
- put_futex_key(&key2);
-out_put_key1:
- put_futex_key(&key1);
-out:
return ret ? ret : task_count;
}
@@ -2567,7 +2522,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
- goto out;
+ return ret ? ret : locked;
}
/*
@@ -2580,7 +2535,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
*/
if (q->pi_state->owner == current) {
ret = fixup_pi_state_owner(uaddr, q, NULL);
- goto out;
+ return ret;
}
/*
@@ -2594,8 +2549,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
q->pi_state->owner);
}
-out:
- return ret ? ret : locked;
+ return ret;
}
/**
@@ -2692,12 +2646,11 @@ retry_private:
ret = get_user(uval, uaddr);
if (ret)
- goto out;
+ return ret;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&q->key);
goto retry;
}
@@ -2706,9 +2659,6 @@ retry_private:
ret = -EWOULDBLOCK;
}
-out:
- if (ret)
- put_futex_key(&q->key);
return ret;
}
@@ -2853,7 +2803,6 @@ retry_private:
* - EAGAIN: The user space value changed.
*/
queue_unlock(hb);
- put_futex_key(&q.key);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -2961,13 +2910,11 @@ no_block:
put_pi_state(pi_state);
}
- goto out_put_key;
+ goto out;
out_unlock_put_key:
queue_unlock(hb);
-out_put_key:
- put_futex_key(&q.key);
out:
if (to) {
hrtimer_cancel(&to->timer);
@@ -2980,12 +2927,11 @@ uaddr_faulted:
ret = fault_in_user_writeable(uaddr);
if (ret)
- goto out_put_key;
+ goto out;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&q.key);
goto retry;
}
@@ -3114,16 +3060,13 @@ retry:
out_unlock:
spin_unlock(&hb->lock);
out_putkey:
- put_futex_key(&key);
return ret;
pi_retry:
- put_futex_key(&key);
cond_resched();
goto retry;
pi_faulted:
- put_futex_key(&key);
ret = fault_in_user_writeable(uaddr);
if (!ret)
@@ -3265,7 +3208,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
if (ret)
- goto out_key2;
+ goto out;
/*
* The check above which compares uaddrs is not sufficient for
@@ -3274,7 +3217,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (match_futex(&q.key, &key2)) {
queue_unlock(hb);
ret = -EINVAL;
- goto out_put_keys;
+ goto out;
}
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
@@ -3284,7 +3227,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
spin_unlock(&hb->lock);
if (ret)
- goto out_put_keys;
+ goto out;
/*
* In order for us to be here, we know our q.key == key2, and since
@@ -3374,11 +3317,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
ret = -EWOULDBLOCK;
}
-out_put_keys:
- put_futex_key(&q.key);
-out_key2:
- put_futex_key(&key2);
-
out:
if (to) {
hrtimer_cancel(&to->timer);
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..732623c30359 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}
if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..6b2fb1a6d8cd 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;
/*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
+ * Because we may generate reports when we're in scheduler code, the use
+ * of printk() could deadlock. Until such time that all printing code
+ * called in print_report() is scheduler-safe, accept the risk, and just
+ * get our message out. As such, also disable lockdep to hide the
+ * warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 29a8de4c50b9..c9ea05edce25 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)
static __always_inline void lockdep_recursion_finish(void)
{
- if (WARN_ON_ONCE(--current->lockdep_recursion))
+ if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
current->lockdep_recursion = 0;
}
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr,
pr_warn("-----------------------------------------------------\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
- curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
- curr->hardirqs_enabled,
+ lockdep_hardirqs_enabled(),
curr->softirqs_enabled);
print_lock(next);
@@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
- lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
- lockdep_hardirqs_enabled(curr),
+ lockdep_hardirqs_enabled(),
lockdep_softirqs_enabled(curr));
print_lock(this);
@@ -3646,10 +3646,19 @@ static void __trace_hardirqs_on_caller(void)
*/
void lockdep_hardirqs_on_prepare(unsigned long ip)
{
- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * NMIs do not (and cannot) track lock dependencies, nothing to do.
+ */
+ if (unlikely(in_nmi()))
return;
- if (unlikely(current->hardirqs_enabled)) {
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
+ return;
+
+ if (unlikely(lockdep_hardirqs_enabled())) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3677,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
* Can't allow enabling interrupts while in an interrupt handler,
* that's general bad form and such. Recursion, limited stack etc..
*/
- if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()))
return;
current->hardirq_chain_key = current->curr_chain_key;
@@ -3692,10 +3701,30 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
{
struct task_struct *curr = current;
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ if (unlikely(!debug_locks))
return;
- if (curr->hardirqs_enabled) {
+ /*
+ * NMIs can happen in the middle of local_irq_{en,dis}able() where the
+ * tracking state and hardware state are out of sync.
+ *
+ * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
+ * and not rely on hardware state like normal interrupts.
+ */
+ if (unlikely(in_nmi())) {
+ /*
+ * Skip:
+ * - recursion check, because NMI can hit lockdep;
+ * - hardware state check, because above;
+ * - chain_key check, see lockdep_hardirqs_on_prepare().
+ */
+ goto skip_checks;
+ }
+
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
+ return;
+
+ if (lockdep_hardirqs_enabled()) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3720,8 +3749,9 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
+skip_checks:
/* we'll do an OFF -> ON transition: */
- curr->hardirqs_enabled = 1;
+ this_cpu_write(hardirqs_enabled, 1);
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_on_events);
@@ -3735,7 +3765,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
+ * they will restore the software state. This ensures the software
+ * state is consistent inside NMIs as well.
+ */
+ if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)))
return;
/*
@@ -3745,11 +3783,11 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled()) {
/*
* We have done an ON -> OFF transition:
*/
- curr->hardirqs_enabled = 0;
+ this_cpu_write(hardirqs_enabled, 0);
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_off_events);
@@ -3794,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip)
* usage bit for all held locks, if hardirqs are
* enabled too:
*/
- if (curr->hardirqs_enabled)
+ if (lockdep_hardirqs_enabled())
mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
lockdep_recursion_finish();
}
@@ -3843,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
*/
if (!hlock->trylock) {
if (hlock->read) {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context())
if (!mark_lock(curr, hlock,
LOCK_USED_IN_HARDIRQ_READ))
return 0;
@@ -3852,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
LOCK_USED_IN_SOFTIRQ_READ))
return 0;
} else {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context())
if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
return 0;
if (curr->softirq_context)
@@ -3890,7 +3928,7 @@ lock_used:
static inline unsigned int task_irq_context(struct task_struct *task)
{
- return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context() +
LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}
@@ -3983,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr)
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
- if (curr->hardirq_context) {
+ if (lockdep_hardirq_context()) {
/*
* Check if force_irqthreads will run us threaded.
*/
@@ -4826,11 +4864,11 @@ static void check_flags(unsigned long flags)
return;
if (irqs_disabled_flags(flags)) {
- if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) {
printk("possible reason: unannotated irqs-off.\n");
}
} else {
- if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())) {
printk("possible reason: unannotated irqs-on.\n");
}
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c4201b7f42b1..5e9aaa648a74 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending)
* where hardirqs are disabled legitimately:
*/
#ifdef CONFIG_TRACE_IRQFLAGS
+
+DEFINE_PER_CPU(int, hardirqs_enabled);
+DEFINE_PER_CPU(int, hardirq_context);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;
@@ -224,7 +230,7 @@ static inline bool lockdep_softirq_start(void)
{
bool in_hardirq = false;
- if (lockdep_hardirq_context(current)) {
+ if (lockdep_hardirq_context()) {
in_hardirq = true;
lockdep_hardirq_exit();
}
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
index 67e01bbadbfe..501262aee8ff 100644
--- a/tools/include/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h
@@ -2,9 +2,9 @@
#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
-# define lockdep_hardirq_context(p) 0
+# define lockdep_hardirq_context() 0
# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_hardirqs_enabled() 0
# define lockdep_softirqs_enabled(p) 0
# define lockdep_hardirq_enter() do { } while (0)
# define lockdep_hardirq_exit() do { } while (0)