summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:39:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:39:35 -0700
commit9ba19ccd2d283a79dd29e8130819c59beca80f62 (patch)
tree8dece4ee137bc9f7a3e5f31ce5613f9b82b7d260 /kernel
parent8f0cb6660acb0d4756df880a3e60e73daa9c244e (diff)
parent992414a18cd4de05fa3f8ff7e1c29af758bdee1a (diff)
Merge tag 'locking-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: - LKMM updates: mostly documentation changes, but also some new litmus tests for atomic ops. - KCSAN updates: the most important change is that GCC 11 now has all fixes in place to support KCSAN, so GCC support can be enabled again. Also more annotations. - futex updates: minor cleanups and simplifications - seqlock updates: merge preparatory changes/cleanups for the 'associated locks' facilities. - lockdep updates: - simplify IRQ trace event handling - add various new debug checks - simplify header dependencies, split out <linux/lockdep_types.h>, decouple lockdep from other low level headers some more - fix NMI handling - misc cleanups and smaller fixes * tag 'locking-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) kcsan: Improve IRQ state trace reporting lockdep: Refactor IRQ trace events fields into struct seqlock: lockdep assert non-preemptibility on seqcount_t write lockdep: Add preemption enabled/disabled assertion APIs seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount() seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs seqlock: Reorder seqcount_t and seqlock_t API definitions seqlock: seqcount_t latch: End read sections with read_seqcount_retry() seqlock: Properly format kernel-doc code samples Documentation: locking: Describe seqlock design and usage locking/qspinlock: Do not include atomic.h from qspinlock_types.h locking/atomic: Move ATOMIC_INIT into linux/types.h lockdep: Move list.h inclusion into lockdep.h locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs futex: Remove unused or redundant includes futex: Consistently use fshared as boolean futex: Remove needless goto's futex: Remove put_futex_key() rwsem: fix commas in initialisation docs: locking: Replace HTTP links with HTTPS ones ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/futex.c114
-rw-r--r--kernel/kcsan/Makefile9
-rw-r--r--kernel/kcsan/atomic.h6
-rw-r--r--kernel/kcsan/core.c37
-rw-r--r--kernel/kcsan/kcsan-test.c1107
-rw-r--r--kernel/kcsan/kcsan.h7
-rw-r--r--kernel/kcsan/report.c12
-rw-r--r--kernel/kcsan/selftest.c (renamed from kernel/kcsan/test.c)0
-rw-r--r--kernel/locking/lockdep.c142
-rw-r--r--kernel/locking/osq_lock.c6
-rw-r--r--kernel/softirq.c8
12 files changed, 1309 insertions, 167 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..0cc3d9cd6cc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -359,7 +359,13 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (new) {
- *new = *orig;
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ /*
+ * orig->shared.rb may be modified concurrently, but the clone
+ * will be reinitialized.
+ */
+ *new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL;
}
@@ -1954,8 +1960,8 @@ static __latent_entropy struct task_struct *copy_process(
rt_mutex_init_task(p);
+ lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
- DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
@@ -2035,19 +2041,11 @@ static __latent_entropy struct task_struct *copy_process(
seqcount_init(&p->mems_allowed_seq);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- p->irq_events = 0;
- p->hardirqs_enabled = 0;
- p->hardirq_enable_ip = 0;
- p->hardirq_enable_event = 0;
- p->hardirq_disable_ip = _THIS_IP_;
- p->hardirq_disable_event = 0;
- p->softirqs_enabled = 1;
- p->softirq_enable_ip = _THIS_IP_;
- p->softirq_enable_event = 0;
- p->softirq_disable_ip = 0;
- p->softirq_disable_event = 0;
- p->hardirq_context = 0;
- p->softirq_context = 0;
+ memset(&p->irqtrace, 0, sizeof(p->irqtrace));
+ p->irqtrace.hardirq_disable_ip = _THIS_IP_;
+ p->irqtrace.softirq_enable_ip = _THIS_IP_;
+ p->softirqs_enabled = 1;
+ p->softirq_context = 0;
#endif
p->pagefault_disabled = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
index e646661f6282..4616d4ad609d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -32,30 +32,13 @@
* "But they come in a choice of three flavours!"
*/
#include <linux/compat.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/fs.h>
-#include <linux/file.h>
#include <linux/jhash.h>
-#include <linux/init.h>
-#include <linux/futex.h>
-#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
-#include <linux/signal.h>
-#include <linux/export.h>
-#include <linux/magic.h>
-#include <linux/pid.h>
-#include <linux/nsproxy.h>
-#include <linux/ptrace.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/freezer.h>
#include <linux/memblock.h>
#include <linux/fault-inject.h>
-#include <linux/refcount.h>
#include <asm/futex.h>
@@ -476,7 +459,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
- * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
* @key: address where result is stored.
* @rw: mapping needs to be read/write (values: FUTEX_READ,
* FUTEX_WRITE)
@@ -500,8 +483,8 @@ static u64 get_inode_sequence_number(struct inode *inode)
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
-static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
+ enum futex_access rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -538,7 +521,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
again:
/* Ignore any VERIFY_READ mapping (futex common case) */
- if (unlikely(should_fail_futex(fshared)))
+ if (unlikely(should_fail_futex(true)))
return -EFAULT;
err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
@@ -626,7 +609,7 @@ again:
* A RO anonymous page will never change and thus doesn't make
* sense for futex operations.
*/
- if (unlikely(should_fail_futex(fshared)) || ro) {
+ if (unlikely(should_fail_futex(true)) || ro) {
err = -EFAULT;
goto out;
}
@@ -677,10 +660,6 @@ out:
return err;
}
-static inline void put_futex_key(union futex_key *key)
-{
-}
-
/**
* fault_in_user_writeable() - Fault in user address and verify RW access
* @uaddr: pointer to faulting user space address
@@ -1611,13 +1590,13 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
hb = hash_futex(&key);
/* Make sure we really have tasks to wakeup */
if (!hb_waiters_pending(hb))
- goto out_put_key;
+ return ret;
spin_lock(&hb->lock);
@@ -1640,9 +1619,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
spin_unlock(&hb->lock);
wake_up_q(&wake_q);
-out_put_key:
- put_futex_key(&key);
-out:
return ret;
}
@@ -1709,10 +1685,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
retry:
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
- goto out_put_key1;
+ return ret;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -1730,13 +1706,13 @@ retry_private:
* an MMU, but we might get them from range checking
*/
ret = op_ret;
- goto out_put_keys;
+ return ret;
}
if (op_ret == -EFAULT) {
ret = fault_in_user_writeable(uaddr2);
if (ret)
- goto out_put_keys;
+ return ret;
}
if (!(flags & FLAGS_SHARED)) {
@@ -1744,8 +1720,6 @@ retry_private:
goto retry_private;
}
- put_futex_key(&key2);
- put_futex_key(&key1);
cond_resched();
goto retry;
}
@@ -1781,11 +1755,6 @@ retry_private:
out_unlock:
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
-out_put_keys:
- put_futex_key(&key2);
-out_put_key1:
- put_futex_key(&key1);
-out:
return ret;
}
@@ -1992,20 +1961,18 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
retry:
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
- goto out;
+ return ret;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
requeue_pi ? FUTEX_WRITE : FUTEX_READ);
if (unlikely(ret != 0))
- goto out_put_key1;
+ return ret;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
- if (requeue_pi && match_futex(&key1, &key2)) {
- ret = -EINVAL;
- goto out_put_keys;
- }
+ if (requeue_pi && match_futex(&key1, &key2))
+ return -EINVAL;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -2025,13 +1992,11 @@ retry_private:
ret = get_user(curval, uaddr1);
if (ret)
- goto out_put_keys;
+ return ret;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&key2);
- put_futex_key(&key1);
goto retry;
}
if (curval != *cmpval) {
@@ -2090,12 +2055,10 @@ retry_private:
case -EFAULT:
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
- goto out;
+ return ret;
case -EBUSY:
case -EAGAIN:
/*
@@ -2106,8 +2069,6 @@ retry_private:
*/
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
- put_futex_key(&key2);
- put_futex_key(&key1);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -2216,12 +2177,6 @@ out_unlock:
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
-
-out_put_keys:
- put_futex_key(&key2);
-out_put_key1:
- put_futex_key(&key1);
-out:
return ret ? ret : task_count;
}
@@ -2567,7 +2522,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
- goto out;
+ return ret ? ret : locked;
}
/*
@@ -2580,7 +2535,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
*/
if (q->pi_state->owner == current) {
ret = fixup_pi_state_owner(uaddr, q, NULL);
- goto out;
+ return ret;
}
/*
@@ -2594,8 +2549,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
q->pi_state->owner);
}
-out:
- return ret ? ret : locked;
+ return ret;
}
/**
@@ -2692,12 +2646,11 @@ retry_private:
ret = get_user(uval, uaddr);
if (ret)
- goto out;
+ return ret;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&q->key);
goto retry;
}
@@ -2706,9 +2659,6 @@ retry_private:
ret = -EWOULDBLOCK;
}
-out:
- if (ret)
- put_futex_key(&q->key);
return ret;
}
@@ -2853,7 +2803,6 @@ retry_private:
* - EAGAIN: The user space value changed.
*/
queue_unlock(hb);
- put_futex_key(&q.key);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -2961,13 +2910,11 @@ no_block:
put_pi_state(pi_state);
}
- goto out_put_key;
+ goto out;
out_unlock_put_key:
queue_unlock(hb);
-out_put_key:
- put_futex_key(&q.key);
out:
if (to) {
hrtimer_cancel(&to->timer);
@@ -2980,12 +2927,11 @@ uaddr_faulted:
ret = fault_in_user_writeable(uaddr);
if (ret)
- goto out_put_key;
+ goto out;
if (!(flags & FLAGS_SHARED))
goto retry_private;
- put_futex_key(&q.key);
goto retry;
}
@@ -3114,16 +3060,13 @@ retry:
out_unlock:
spin_unlock(&hb->lock);
out_putkey:
- put_futex_key(&key);
return ret;
pi_retry:
- put_futex_key(&key);
cond_resched();
goto retry;
pi_faulted:
- put_futex_key(&key);
ret = fault_in_user_writeable(uaddr);
if (!ret)
@@ -3265,7 +3208,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
if (ret)
- goto out_key2;
+ goto out;
/*
* The check above which compares uaddrs is not sufficient for
@@ -3274,7 +3217,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (match_futex(&q.key, &key2)) {
queue_unlock(hb);
ret = -EINVAL;
- goto out_put_keys;
+ goto out;
}
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
@@ -3284,7 +3227,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
spin_unlock(&hb->lock);
if (ret)
- goto out_put_keys;
+ goto out;
/*
* In order for us to be here, we know our q.key == key2, and since
@@ -3374,11 +3317,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
ret = -EWOULDBLOCK;
}
-out_put_keys:
- put_futex_key(&q.key);
-out_key2:
- put_futex_key(&key2);
-
out:
if (to) {
hrtimer_cancel(&to->timer);
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
index d4999b38d1be..65ca5539c470 100644
--- a/kernel/kcsan/Makefile
+++ b/kernel/kcsan/Makefile
@@ -7,8 +7,11 @@ CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
-CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \
- $(call cc-option,-fno-stack-protector,)
+CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
+ -fno-stack-protector -DDISABLE_BRANCH_PROFILING
obj-y := core.o debugfs.o report.o
-obj-$(CONFIG_KCSAN_SELFTEST) += test.o
+obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
+
+CFLAGS_kcsan-test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
+obj-$(CONFIG_KCSAN_TEST) += kcsan-test.o
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
index be9e625227f3..75fe701f4127 100644
--- a/kernel/kcsan/atomic.h
+++ b/kernel/kcsan/atomic.h
@@ -3,8 +3,7 @@
#ifndef _KERNEL_KCSAN_ATOMIC_H
#define _KERNEL_KCSAN_ATOMIC_H
-#include <linux/jiffies.h>
-#include <linux/sched.h>
+#include <linux/types.h>
/*
* Special rules for certain memory where concurrent conflicting accesses are
@@ -13,8 +12,7 @@
*/
static bool kcsan_is_atomic_special(const volatile void *ptr)
{
- /* volatile globals that have been observed in data races. */
- return ptr == &jiffies || ptr == &current->state;
+ return false;
}
#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..9147ff6a12e5 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -291,6 +291,20 @@ static inline unsigned int get_delay(void)
0);
}
+void kcsan_save_irqtrace(struct task_struct *task)
+{
+#ifdef CONFIG_TRACE_IRQFLAGS
+ task->kcsan_save_irqtrace = task->irqtrace;
+#endif
+}
+
+void kcsan_restore_irqtrace(struct task_struct *task)
+{
+#ifdef CONFIG_TRACE_IRQFLAGS
+ task->irqtrace = task->kcsan_save_irqtrace;
+#endif
+}
+
/*
* Pull everything together: check_access() below contains the performance
* critical operations; the fast-path (including check_access) functions should
@@ -336,9 +350,11 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
flags = user_access_save();
if (consumed) {
+ kcsan_save_irqtrace(current);
kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
KCSAN_REPORT_CONSUMED_WATCHPOINT,
watchpoint - watchpoints);
+ kcsan_restore_irqtrace(current);
} else {
/*
* The other thread may not print any diagnostics, as it has
@@ -396,9 +412,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
goto out;
}
+ /*
+ * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
+ * runtime is entered for every memory access, and potentially useful
+ * information is lost if dirtied by KCSAN.
+ */
+ kcsan_save_irqtrace(current);
if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +560,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
+ kcsan_restore_irqtrace(current);
out:
user_access_restore(ua_flags);
}
@@ -754,6 +776,7 @@ EXPORT_SYMBOL(__kcsan_check_access);
*/
#define DEFINE_TSAN_READ_WRITE(size) \
+ void __tsan_read##size(void *ptr); \
void __tsan_read##size(void *ptr) \
{ \
check_access(ptr, size, 0); \
@@ -762,6 +785,7 @@ EXPORT_SYMBOL(__kcsan_check_access);
void __tsan_unaligned_read##size(void *ptr) \
__alias(__tsan_read##size); \
EXPORT_SYMBOL(__tsan_unaligned_read##size); \
+ void __tsan_write##size(void *ptr); \
void __tsan_write##size(void *ptr) \
{ \
check_access(ptr, size, KCSAN_ACCESS_WRITE); \
@@ -777,12 +801,14 @@ DEFINE_TSAN_READ_WRITE(4);
DEFINE_TSAN_READ_WRITE(8);
DEFINE_TSAN_READ_WRITE(16);
+void __tsan_read_range(void *ptr, size_t size);
void __tsan_read_range(void *ptr, size_t size)
{
check_access(ptr, size, 0);
}
EXPORT_SYMBOL(__tsan_read_range);
+void __tsan_write_range(void *ptr, size_t size);
void __tsan_write_range(void *ptr, size_t size)
{
check_access(ptr, size, KCSAN_ACCESS_WRITE);
@@ -799,6 +825,7 @@ EXPORT_SYMBOL(__tsan_write_range);
* the size-check of compiletime_assert_rwonce_type().
*/
#define DEFINE_TSAN_VOLATILE_READ_WRITE(size) \
+ void __tsan_volatile_read##size(void *ptr); \
void __tsan_volatile_read##size(void *ptr) \
{ \
const bool is_atomic = size <= sizeof(long long) && \
@@ -811,6 +838,7 @@ EXPORT_SYMBOL(__tsan_write_range);
void __tsan_unaligned_volatile_read##size(void *ptr) \
__alias(__tsan_volatile_read##size); \
EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size); \
+ void __tsan_volatile_write##size(void *ptr); \
void __tsan_volatile_write##size(void *ptr) \
{ \
const bool is_atomic = size <= sizeof(long long) && \
@@ -836,14 +864,17 @@ DEFINE_TSAN_VOLATILE_READ_WRITE(16);
* The below are not required by KCSAN, but can still be emitted by the
* compiler.
*/
+void __tsan_func_entry(void *call_pc);
void __tsan_func_entry(void *call_pc)
{
}
EXPORT_SYMBOL(__tsan_func_entry);
+void __tsan_func_exit(void);
void __tsan_func_exit(void)
{
}
EXPORT_SYMBOL(__tsan_func_exit);
+void __tsan_init(void);
void __tsan_init(void)
{
}
diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c
new file mode 100644
index 000000000000..fed6fcb5768c
--- /dev/null
+++ b/kernel/kcsan/kcsan-test.c
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KCSAN test with various race scenarious to test runtime behaviour. Since the
+ * interface with which KCSAN's reports are obtained is via the console, this is
+ * the output we should verify. For each test case checks the presence (or
+ * absence) of generated reports. Relies on 'console' tracepoint to capture
+ * reports as they appear in the kernel log.
+ *
+ * Makes use of KUnit for test organization, and the Torture framework for test
+ * thread control.
+ *
+ * Copyright (C) 2020, Google LLC.
+ * Author: Marco Elver <elver@google.com>
+ */
+
+#include <kunit/test.h>
+#include <linux/jiffies.h>
+#include <linux/kcsan-checks.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/seqlock.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/torture.h>
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+#include <trace/events/printk.h>
+
+/* Points to current test-case memory access "kernels". */
+static void (*access_kernels[2])(void);
+
+static struct task_struct **threads; /* Lists of threads. */
+static unsigned long end_time; /* End time of test. */
+
+/* Report as observed from console. */
+static struct {
+ spinlock_t lock;
+ int nlines;
+ char lines[3][512];
+} observed = {
+ .lock = __SPIN_LOCK_UNLOCKED(observed.lock),
+};
+
+/* Setup test checking loop. */
+static __no_kcsan inline void
+begin_test_checks(void (*func1)(void), void (*func2)(void))
+{
+ kcsan_disable_current();
+
+ /*
+ * Require at least as long as KCSAN_REPORT_ONCE_IN_MS, to ensure at
+ * least one race is reported.
+ */
+ end_time = jiffies + msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS + 500);
+
+ /* Signal start; release potential initialization of shared data. */
+ smp_store_release(&access_kernels[0], func1);
+ smp_store_release(&access_kernels[1], func2);
+}
+
+/* End test checking loop. */
+static __no_kcsan inline bool
+end_test_checks(bool stop)
+{
+ if (!stop && time_before(jiffies, end_time)) {
+ /* Continue checking */
+ might_sleep();
+ return false;
+ }
+
+ kcsan_enable_current();
+ return true;
+}
+
+/*
+ * Probe for console output: checks if a race was reported, and obtains observed
+ * lines of interest.
+ */
+__no_kcsan
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+ unsigned long flags;
+ int nlines;
+
+ /*
+ * Note that KCSAN reports under a global lock, so we do not risk the
+ * possibility of having multiple reports interleaved. If that were the
+ * case, we'd expect tests to fail.
+ */
+
+ spin_lock_irqsave(&observed.lock, flags);
+ nlines = observed.nlines;
+
+ if (strnstr(buf, "BUG: KCSAN: ", len) && strnstr(buf, "test_", len)) {
+ /*
+ * KCSAN report and related to the test.
+ *
+ * The provided @buf is not NUL-terminated; copy no more than
+ * @len bytes and let strscpy() add the missing NUL-terminator.
+ */
+ strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
+ nlines = 1;
+ } else if ((nlines == 1 || nlines == 2) && strnstr(buf, "bytes by", len)) {
+ strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
+
+ if (strnstr(buf, "race at unknown origin", len)) {
+ if (WARN_ON(nlines != 2))
+ goto out;
+
+ /* No second line of interest. */
+ strcpy(observed.lines[nlines++], "<none>");
+ }
+ }
+
+out:
+ WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
+ spin_unlock_irqrestore(&observed.lock, flags);
+}
+
+/* Check if a report related to the test exists. */
+__no_kcsan
+static bool report_available(void)
+{
+ return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
+}
+
+/* Report information we expect in a report. */
+struct expect_report {
+ /* Access information of both accesses. */
+ struct {
+ void *fn; /* Function pointer to expected function of top frame. */
+ void *addr; /* Address of access; unchecked if NULL. */
+ size_t size; /* Size of access; unchecked if @addr is NULL. */
+ int type; /* Access type, see KCSAN_ACCESS definitions. */
+ } access[2];
+};
+
+/* Check observed report matches information in @r. */
+__no_kcsan
+static bool report_matches(const struct expect_report *r)
+{
+ const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT;
+ bool ret = false;
+ unsigned long flags;
+ typeof(observed.lines) expect;
+ const char *end;
+ char *cur;
+ int i;
+
+ /* Doubled-checked locking. */
+ if (!report_available())
+ return false;
+
+ /* Generate expected report contents. */
+
+ /* Title */
+ cur = expect[0];
+ end = &expect[0][sizeof(expect[0]) - 1];
+ cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ",
+ is_assert ? "assert: race" : "data-race");
+ if (r->access[1].fn) {
+ char tmp[2][64];
+ int cmp;
+
+ /* Expect lexographically sorted function names in title. */
+ scnprintf(tmp[0], sizeof(tmp[0]), "%pS", r->access[0].fn);
+ scnprintf(tmp[1], sizeof(tmp[1]), "%pS", r->access[1].fn);
+ cmp = strcmp(tmp[0], tmp[1]);
+ cur += scnprintf(cur, end - cur, "%ps / %ps",
+ cmp < 0 ? r->access[0].fn : r->access[1].fn,
+ cmp < 0 ? r->access[1].fn : r->access[0].fn);
+ } else {
+ scnprintf(cur, end - cur, "%pS", r->access[0].fn);
+ /* The exact offset won't match, remove it. */
+ cur = strchr(expect[0], '+');
+ if (cur)
+ *cur = '\0';
+ }
+
+ /* Access 1 */
+ cur = expect[1];
+ end = &expect[1][sizeof(expect[1]) - 1];
+ if (!r->access[1].fn)
+ cur += scnprintf(cur, end - cur, "race at unknown origin, with ");
+
+ /* Access 1 & 2 */
+ for (i = 0; i < 2; ++i) {
+ const char *const access_type =
+ (r->access[i].type & KCSAN_ACCESS_ASSERT) ?
+ ((r->access[i].type & KCSAN_ACCESS_WRITE) ?
+ "assert no accesses" :
+ "assert no writes") :
+ ((r->access[i].type & KCSAN_ACCESS_WRITE) ?
+ "write" :
+ "read");
+ const char *const access_type_aux =
+ (r->access[i].type & KCSAN_ACCESS_ATOMIC) ?
+ " (marked)" :
+ ((r->access[i].type & KCSAN_ACCESS_SCOPED) ?
+ " (scoped)" :
+ "");
+
+ if (i == 1) {
+ /* Access 2 */
+ cur = expect[2];
+ end = &expect[2][sizeof(expect[2]) - 1];
+
+ if (!r->access[1].fn) {
+ /* Dummy string if no second access is available. */
+ strcpy(cur, "<none>");
+ break;
+ }
+ }
+
+ cur += scnprintf(cur, end - cur, "%s%s to ", access_type,
+ access_type_aux);
+
+ if (r->access[i].addr) /* Address is optional. */
+ cur += scnprintf(cur, end - cur, "0x%px of %zu bytes",
+ r->access[i].addr, r->access[i].size);
+ }
+
+ spin_lock_irqsave(&observed.lock, flags);
+ if (!report_available())
+ goto out; /* A new report is being captured. */
+
+ /* Finally match expected output to what we actually observed. */
+ ret = strstr(observed.lines[0], expect[0]) &&
+ /* Access info may appear in any order. */
+ ((strstr(observed.lines[1], expect[1]) &&
+ strstr(observed.lines[2], expect[2])) ||
+ (strstr(observed.lines[1], expect[2]) &&
+ strstr(observed.lines[2], expect[1])));
+out:
+ spin_unlock_irqrestore(&observed.lock, flags);
+ return ret;
+}
+
+/* ===== Test kernels ===== */
+
+static long test_sink;
+static long test_var;
+/* @test_array should be large enough to fall into multiple watchpoint slots. */
+static long test_array[3 * PAGE_SIZE / sizeof(long)];
+static struct {
+ long val[8];
+} test_struct;
+static DEFINE_SEQLOCK(test_seqlock);
+
+/*
+ * Helper to avoid compiler optimizing out reads, and to generate source values
+ * for writes.
+ */
+__no_kcsan
+static noinline void sink_value(long v) { WRITE_ONCE(test_sink, v); }
+
+static noinline void test_kernel_read(void) { sink_value(test_var); }
+
+static noinline void test_kernel_write(void)
+{
+ test_var = READ_ONCE_NOCHECK(test_sink) + 1;
+}
+
+static noinline void test_kernel_write_nochange(void) { test_var = 42; }
+
+/* Suffixed by value-change exception filter. */
+static noinline void test_kernel_write_nochange_rcu(void) { test_var = 42; }
+
+static noinline void test_kernel_read_atomic(void)
+{
+ sink_value(READ_ONCE(test_var));
+}
+
+static noinline void test_kernel_write_atomic(void)
+{
+ WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1);
+}
+
+__no_kcsan
+static noinline void test_kernel_write_uninstrumented(void) { test_var++; }
+
+static noinline void test_kernel_data_race(void) { data_race(test_var++); }
+
+static noinline void test_kernel_assert_writer(void)
+{
+ ASSERT_EXCLUSIVE_WRITER(test_var);
+}
+
+static noinline void test_kernel_assert_access(void)
+{
+ ASSERT_EXCLUSIVE_ACCESS(test_var);
+}
+
+#define TEST_CHANGE_BITS 0xff00ff00
+
+static noinline void test_kernel_change_bits(void)
+{
+ if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
+ /*
+ * Avoid race of unknown origin for this test, just pretend they
+ * are atomic.
+ */
+ kcsan_nestable_atomic_begin();
+ test_var ^= TEST_CHANGE_BITS;
+ kcsan_nestable_atomic_end();
+ } else
+ WRITE_ONCE(test_var, READ_ONCE(test_var) ^ TEST_CHANGE_BITS);
+}
+
+static noinline void test_kernel_assert_bits_change(void)
+{
+ ASSERT_EXCLUSIVE_BITS(test_var, TEST_CHANGE_BITS);
+}
+
+static noinline void test_kernel_assert_bits_nochange(void)
+{
+ ASSERT_EXCLUSIVE_BITS(test_var, ~TEST_CHANGE_BITS);
+}
+
+/* To check that scoped assertions do trigger anywhere in scope. */
+static noinline void test_enter_scope(void)
+{
+ int x = 0;
+
+ /* Unrelated accesses to scoped assert. */
+ READ_ONCE(test_sink);
+ kcsan_check_read(&x, sizeof(x));
+}
+
+static noinline void test_kernel_assert_writer_scoped(void)
+{
+ ASSERT_EXCLUSIVE_WRITER_SCOPED(test_var);
+ test_enter_scope();
+}
+
+static noinline void test_kernel_assert_access_scoped(void)
+{
+ ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_var);
+ test_enter_scope();
+}
+
+static noinline void test_kernel_rmw_array(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_array); ++i)
+ test_array[i]++;
+}
+
+static noinline void test_kernel_write_struct(void)
+{
+ kcsan_check_write(&test_struct, sizeof(test_struct));
+ kcsan_disable_current();
+ test_struct.val[3]++; /* induce value change */
+ kcsan_enable_current();
+}
+
+static noinline void test_kernel_write_struct_part(void)
+{
+ test_struct.val[3] = 42;
+}
+
+static noinline void test_kernel_read_struct_zero_size(void)
+{
+ kcsan_check_read(&test_struct.val[3], 0);
+}
+
+static noinline void test_kernel_jiffies_reader(void)
+{
+ sink_value((long)jiffies);
+}
+
+static noinline void test_kernel_seqlock_reader(void)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&test_seqlock);
+ sink_value(test_var);
+ } while (read_seqretry(&test_seqlock, seq));
+}
+
+static noinline void test_kernel_seqlock_writer(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&test_seqlock, flags);
+ test_var++;
+ write_sequnlock_irqrestore(&test_seqlock, flags);
+}
+
+/* ===== Test cases ===== */
+
+/* Simple test with normal data race. */
+__no_kcsan
+static void test_basic(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ static const struct expect_report never = {
+ .access = {
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ bool match_expect = false;
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_write, test_kernel_read);
+ do {
+ match_expect |= report_matches(&expect);
+ match_never = report_matches(&never);
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/*
+ * Stress KCSAN with lots of concurrent races on different addresses until
+ * timeout.
+ */
+__no_kcsan
+static void test_concurrent_races(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ /* NULL will match any address. */
+ { test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE },
+ { test_kernel_rmw_array, NULL, 0, 0 },
+ },
+ };
+ static const struct expect_report never = {
+ .access = {
+ { test_kernel_rmw_array, NULL, 0, 0 },
+ { test_kernel_rmw_array, NULL, 0, 0 },
+ },
+ };
+ bool match_expect = false;
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_rmw_array, test_kernel_rmw_array);
+ do {
+ match_expect |= report_matches(&expect);
+ match_never |= report_matches(&never);
+ } while (!end_test_checks(false));
+ KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check matches exist. */
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test the KCSAN_REPORT_VALUE_CHANGE_ONLY option. */
+__no_kcsan
+static void test_novalue_change(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write_nochange, test_kernel_read);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY))
+ KUNIT_EXPECT_FALSE(test, match_expect);
+ else
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that the rules where the KCSAN_REPORT_VALUE_CHANGE_ONLY option should
+ * never apply work.
+ */
+__no_kcsan
+static void test_novalue_change_exception(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write_nochange_rcu, test_kernel_read);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Test that data races of unknown origin are reported. */
+__no_kcsan
+static void test_unknown_origin(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ { NULL },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write_uninstrumented, test_kernel_read);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN))
+ KUNIT_EXPECT_TRUE(test, match_expect);
+ else
+ KUNIT_EXPECT_FALSE(test, match_expect);
+}
+
+/* Test KCSAN_ASSUME_PLAIN_WRITES_ATOMIC if it is selected. */
+__no_kcsan
+static void test_write_write_assume_atomic(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write, test_kernel_write);
+ do {
+ sink_value(READ_ONCE(test_var)); /* induce value-change */
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC))
+ KUNIT_EXPECT_FALSE(test, match_expect);
+ else
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that data races with writes larger than word-size are always reported,
+ * even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected.
+ */
+__no_kcsan
+static void test_write_write_struct(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write_struct, test_kernel_write_struct);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that data races where only one write is larger than word-size are always
+ * reported, even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected.
+ */
+__no_kcsan
+static void test_write_write_struct_part(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ { test_kernel_write_struct_part, &test_struct.val[3], sizeof(test_struct.val[3]), KCSAN_ACCESS_WRITE },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_write_struct, test_kernel_write_struct_part);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Test that races with atomic accesses never result in reports. */
+__no_kcsan
+static void test_read_atomic_write_atomic(struct kunit *test)
+{
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_read_atomic, test_kernel_write_atomic);
+ do {
+ match_never = report_available();
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test that a race with an atomic and plain access result in reports. */
+__no_kcsan
+static void test_read_plain_atomic_write(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ { test_kernel_write_atomic, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC },
+ },
+ };
+ bool match_expect = false;
+
+ if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS))
+ return;
+
+ begin_test_checks(test_kernel_read, test_kernel_write_atomic);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Zero-sized accesses should never cause data race reports. */
+__no_kcsan
+static void test_zero_size_access(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ },
+ };
+ const struct expect_report never = {
+ .access = {
+ { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+ { test_kernel_read_struct_zero_size, &test_struct.val[3], 0, 0 },
+ },
+ };
+ bool match_expect = false;
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_write_struct, test_kernel_read_struct_zero_size);
+ do {
+ match_expect |= report_matches(&expect);
+ match_never = report_matches(&never);
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check. */
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test the data_race() macro. */
+__no_kcsan
+static void test_data_race(struct kunit *test)
+{
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_data_race, test_kernel_data_race);
+ do {
+ match_never = report_available();
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_writer(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+ { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_assert_writer, test_kernel_write_nochange);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_assert_access, test_kernel_read);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access_writer(struct kunit *test)
+{
+ const struct expect_report expect_access_writer = {
+ .access = {
+ { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+ { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+ },
+ };
+ const struct expect_report expect_access_access = {
+ .access = {
+ { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+ { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+ },
+ };
+ const struct expect_report never = {
+ .access = {
+ { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+ { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+ },
+ };
+ bool match_expect_access_writer = false;
+ bool match_expect_access_access = false;
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_assert_access, test_kernel_assert_writer);
+ do {
+ match_expect_access_writer |= report_matches(&expect_access_writer);
+ match_expect_access_access |= report_matches(&expect_access_access);
+ match_never |= report_matches(&never);
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_TRUE(test, match_expect_access_writer);
+ KUNIT_EXPECT_TRUE(test, match_expect_access_access);
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_bits_change(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .access = {
+ { test_kernel_assert_bits_change, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+ { test_kernel_change_bits, &test_var, sizeof(test_var),
+ KCSAN_ACCESS_WRITE | (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) ? 0 : KCSAN_ACCESS_ATOMIC) },
+ },
+ };
+ bool match_expect = false;
+
+ begin_test_checks(test_kernel_assert_bits_change, test_kernel_change_bits);
+ do {
+ match_expect = report_matches(&expect);
+ } while (!end_test_checks(match_expect));
+ KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_bits_nochange(struct kunit *test)
+{
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_assert_bits_nochange, test_kernel_change_bits);
+ do {
+ match_never = report_available();
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_writer_scoped(struct kunit *test)
+{
+ const struct expect_report expect_start = {
+ .access = {
+ { test_kernel_assert_writer_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED },
+ { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ },
+ };
+ const struct expect_report expect_anywhere = {
+ .access = {
+ { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED },
+ { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+ },
+ };
+ bool match_expect_start = false;
+ bool match_expect_anywhere = false;
+
+ begin_test_checks(test_kernel_assert_writer_scoped, test_kernel_write_nochange);
+ do {
+ match_expect_start |= report_matches(&expect_start);
+ match_expect_anywhere |= report_matches(&expect_anywhere);
+ } while (!end_test_checks(match_expect_start && match_expect_anywhere));
+ KUNIT_EXPECT_TRUE(test, match_expect_start);
+ KUNIT_EXPECT_TRUE(test, match_expect_anywhere);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access_scoped(struct kunit *test)
+{
+ const struct expect_report expect_start1 = {
+ .access = {
+ { test_kernel_assert_access_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ const struct expect_report expect_start2 = {
+ .access = { expect_start1.access[0], expect_start1.access[0] },
+ };
+ const struct expect_report expect_inscope = {
+ .access = {
+ { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED },
+ { test_kernel_read, &test_var, sizeof(test_var), 0 },
+ },
+ };
+ bool match_expect_start = false;
+ bool match_expect_inscope = false;
+
+ begin_test_checks(test_kernel_assert_access_scoped, test_kernel_read);
+ end_time += msecs_to_jiffies(1000); /* This test requires a bit more time. */
+ do {
+ match_expect_start |= report_matches(&expect_start1) || report_matches(&expect_start2);
+ match_expect_inscope |= report_matches(&expect_inscope);
+ } while (!end_test_checks(match_expect_start && match_expect_inscope));
+ KUNIT_EXPECT_TRUE(test, match_expect_start);
+ KUNIT_EXPECT_TRUE(test, match_expect_inscope);
+}
+
+/*
+ * jiffies is special (declared to be volatile) and its accesses are typically
+ * not marked; this test ensures that the compiler nor KCSAN gets confused about
+ * jiffies's declaration on different architectures.
+ */
+__no_kcsan
+static void test_jiffies_noreport(struct kunit *test)
+{
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_jiffies_reader, test_kernel_jiffies_reader);
+ do {
+ match_never = report_available();
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test that racing accesses in seqlock critical sections are not reported. */
+__no_kcsan
+static void test_seqlock_noreport(struct kunit *test)
+{
+ bool match_never = false;
+
+ begin_test_checks(test_kernel_seqlock_reader, test_kernel_seqlock_writer);
+ do {
+ match_never = report_available();
+ } while (!end_test_checks(match_never));
+ KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/*
+ * Each test case is run with different numbers of threads. Until KUnit supports
+ * passing arguments for each test case, we encode #threads in the test case
+ * name (read by get_num_threads()). [The '-' was chosen as a stylistic
+ * preference to separate test name and #threads.]
+ *
+ * The thread counts are chosen to cover potentially interesting boundaries and
+ * corner cases (range 2-5), and then stress the system with larger counts.
+ */
+#define KCSAN_KUNIT_CASE(test_name) \
+ { .run_case = test_name, .name = #test_name "-02" }, \
+ { .run_case = test_name, .name = #test_name "-03" }, \
+ { .run_case = test_name, .name = #test_name "-04" }, \
+ { .run_case = test_name, .name = #test_name "-05" }, \
+ { .run_case = test_name, .name = #test_name "-08" }, \
+ { .run_case = test_name, .name = #test_name "-16" }
+
+static struct kunit_case kcsan_test_cases[] = {
+ KCSAN_KUNIT_CASE(test_basic),
+ KCSAN_KUNIT_CASE(test_concurrent_races),
+ KCSAN_KUNIT_CASE(test_novalue_change),
+ KCSAN_KUNIT_CASE(test_novalue_change_exception),
+ KCSAN_KUNIT_CASE(test_unknown_origin),
+ KCSAN_KUNIT_CASE(test_write_write_assume_atomic),
+ KCSAN_KUNIT_CASE(test_write_write_struct),
+ KCSAN_KUNIT_CASE(test_write_write_struct_part),
+ KCSAN_KUNIT_CASE(test_read_atomic_write_atomic),
+ KCSAN_KUNIT_CASE(test_read_plain_atomic_write),
+ KCSAN_KUNIT_CASE(test_zero_size_access),
+ KCSAN_KUNIT_CASE(test_data_race),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_writer),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_access),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_access_writer),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_bits_change),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_bits_nochange),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_writer_scoped),
+ KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped),
+ KCSAN_KUNIT_CASE(test_jiffies_noreport),
+ KCSAN_KUNIT_CASE(test_seqlock_noreport),
+ {},
+};
+
+/* ===== End test cases ===== */
+
+/* Get number of threads encoded in test name. */
+static bool __no_kcsan
+get_num_threads(const char *test, int *nthreads)
+{
+ int len = strlen(test);
+
+ if (WARN_ON(len < 3))
+ return false;
+
+ *nthreads = test[len - 1] - '0';
+ *nthreads += (test[len - 2] - '0') * 10;
+
+ if (WARN_ON(*nthreads < 0))
+ return false;
+
+ return true;
+}
+
+/* Concurrent accesses from interrupts. */
+__no_kcsan
+static void access_thread_timer(struct timer_list *timer)
+{
+ static atomic_t cnt = ATOMIC_INIT(0);
+ unsigned int idx;
+ void (*func)(void);
+
+ idx = (unsigned int)atomic_inc_return(&cnt) % ARRAY_SIZE(access_kernels);
+ /* Acquire potential initialization. */
+ func = smp_load_acquire(&access_kernels[idx]);
+ if (func)
+ func();
+}
+
+/* The main loop for each thread. */
+__no_kcsan
+static int access_thread(void *arg)
+{
+ struct timer_list timer;
+ unsigned int cnt = 0;
+ unsigned int idx;
+ void (*func)(void);
+
+ timer_setup_on_stack(&timer, access_thread_timer, 0);
+ do {
+ might_sleep();
+
+ if (!timer_pending(&timer))
+ mod_timer(&timer, jiffies + 1);
+ else {
+ /* Iterate through all kernels. */
+ idx = cnt++ % ARRAY_SIZE(access_kernels);
+ /* Acquire potential initialization. */
+ func = smp_load_acquire(&access_kernels[idx]);
+ if (func)
+ func();
+ }
+ } while (!torture_must_stop());
+ del_timer_sync(&timer);
+ destroy_timer_on_stack(&timer);
+
+ torture_kthread_stopping("access_thread");
+ return 0;
+}
+
+__no_kcsan
+static int test_init(struct kunit *test)
+{
+ unsigned long flags;
+ int nthreads;
+ int i;
+
+ spin_lock_irqsave(&observed.lock, flags);
+ for (i = 0; i < ARRAY_SIZE(observed.lines); ++i)
+ observed.lines[i][0] = '\0';
+ observed.nlines = 0;
+ spin_unlock_irqrestore(&observed.lock, flags);
+
+ if (!torture_init_begin((char *)test->name, 1))
+ return -EBUSY;
+
+ if (!get_num_threads(test->name, &nthreads))
+ goto err;
+
+ if (WARN_ON(threads))
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(access_kernels); ++i) {
+ if (WARN_ON(access_kernels[i]))
+ goto err;
+ }
+
+ if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
+ /*
+ * Without any preemption, keep 2 CPUs free for other tasks, one
+ * of which is the main test case function checking for
+ * completion or failure.
+ */
+ const int min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
+ const int min_required_cpus = 2 + min_unused_cpus;
+
+ if (num_online_cpus() < min_required_cpus) {
+ pr_err("%s: too few online CPUs (%u < %d) for test",
+ test->name, num_online_cpus(), min_required_cpus);
+ goto err;
+ } else if (nthreads > num_online_cpus() - min_unused_cpus) {
+ nthreads = num_online_cpus() - min_unused_cpus;
+ pr_warn("%s: limiting number of threads to %d\n",
+ test->name, nthreads);
+ }
+ }
+
+ if (nthreads) {
+ threads = kcalloc(nthreads + 1, sizeof(struct task_struct *),
+ GFP_KERNEL);
+ if (WARN_ON(!threads))
+ goto err;
+
+ threads[nthreads] = NULL;
+ for (i = 0; i < nthreads; ++i) {
+ if (torture_create_kthread(access_thread, NULL,
+ threads[i]))
+ goto err;
+ }
+ }
+
+ torture_init_end();
+
+ return 0;
+
+err:
+ kfree(threads);
+ threads = NULL;
+ torture_init_end();
+ return -EINVAL;
+}
+
+__no_kcsan
+static void test_exit(struct kunit *test)
+{
+ struct task_struct **stop_thread;
+ int i;
+
+ if (torture_cleanup_begin())
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(access_kernels); ++i)
+ WRITE_ONCE(access_kernels[i], NULL);
+
+ if (threads) {
+ for (stop_thread = threads; *stop_thread; stop_thread++)
+ torture_stop_kthread(reader_thread, *stop_thread);
+
+ kfree(threads);
+ threads = NULL;
+ }
+
+ torture_cleanup_end();
+}
+
+static struct kunit_suite kcsan_test_suite = {
+ .name = "kcsan-test",
+ .test_cases = kcsan_test_cases,
+ .init = test_init,
+ .exit = test_exit,
+};
+static struct kunit_suite *kcsan_test_suites[] = { &kcsan_test_suite, NULL };
+
+__no_kcsan
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+ check_trace_callback_type_console(probe_console);
+ if (!strcmp(tp->name, "console"))
+ WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+__no_kcsan
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+ if (!strcmp(tp->name, "console"))
+ tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+/*
+ * We only want to do tracepoints setup and teardown once, therefore we have to
+ * customize the init and exit functions and cannot rely on kunit_test_suite().
+ */
+static int __init kcsan_test_init(void)
+{
+ /*
+ * Because we want to be able to build the test as a module, we need to
+ * iterate through all known tracepoints, since the static registration
+ * won't work here.
+ */
+ for_each_kernel_tracepoint(register_tracepoints, NULL);
+ return __kunit_test_suites_init(kcsan_test_suites);
+}
+
+static void kcsan_test_exit(void)
+{
+ __kunit_test_suites_exit(kcsan_test_suites);
+ for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+ tracepoint_synchronize_unregister();
+}
+
+late_initcall(kcsan_test_init);
+module_exit(kcsan_test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Marco Elver <elver@google.com>");
diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
index 763d6d08d94b..29480010dc30 100644
--- a/kernel/kcsan/kcsan.h
+++ b/kernel/kcsan/kcsan.h
@@ -9,6 +9,7 @@
#define _KERNEL_KCSAN_KCSAN_H
#include <linux/kcsan.h>
+#include <linux/sched.h>
/* The number of adjacent watchpoints to check. */
#define KCSAN_CHECK_ADJACENT 1
@@ -23,6 +24,12 @@ extern unsigned int kcsan_udelay_interrupt;
extern bool kcsan_enabled;
/*
+ * Save/restore IRQ flags state trace dirtied by KCSAN.
+ */
+void kcsan_save_irqtrace(struct task_struct *task);
+void kcsan_restore_irqtrace(struct task_struct *task);
+
+/*
* Initialize debugfs file.
*/
void kcsan_debugfs_init(void);
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..9d07e175de0f 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -308,6 +308,9 @@ static void print_verbose_info(struct task_struct *task)
if (!task)
return;
+ /* Restore IRQ state trace for printing. */
+ kcsan_restore_irqtrace(task);
+
pr_err("\n");
debug_show_held_locks(task);
print_irqtrace_events(task);
@@ -606,10 +609,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;
/*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
+ * Because we may generate reports when we're in scheduler code, the use
+ * of printk() could deadlock. Until such time that all printing code
+ * called in print_report() is scheduler-safe, accept the risk, and just
+ * get our message out. As such, also disable lockdep to hide the
+ * warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();
diff --git a/kernel/kcsan/test.c b/kernel/kcsan/selftest.c
index d26a052d3383..d26a052d3383 100644
--- a/kernel/kcsan/test.c
+++ b/kernel/kcsan/selftest.c
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0a7549d159ed..f361d75a75ac 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)
static __always_inline void lockdep_recursion_finish(void)
{
- if (WARN_ON_ONCE(--current->lockdep_recursion))
+ if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
current->lockdep_recursion = 0;
}
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr,
pr_warn("-----------------------------------------------------\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
- curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
- curr->hardirqs_enabled,
+ lockdep_hardirqs_enabled(),
curr->softirqs_enabled);
print_lock(next);
@@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
- lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
- lockdep_hardirqs_enabled(curr),
+ lockdep_hardirqs_enabled(),
lockdep_softirqs_enabled(curr));
print_lock(this);
@@ -3484,19 +3484,21 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
void print_irqtrace_events(struct task_struct *curr)
{
- printk("irq event stamp: %u\n", curr->irq_events);
+ const struct irqtrace_events *trace = &curr->irqtrace;
+
+ printk("irq event stamp: %u\n", trace->irq_events);
printk("hardirqs last enabled at (%u): [<%px>] %pS\n",
- curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip,
- (void *)curr->hardirq_enable_ip);
+ trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip,
+ (void *)trace->hardirq_enable_ip);
printk("hardirqs last disabled at (%u): [<%px>] %pS\n",
- curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip,
- (void *)curr->hardirq_disable_ip);
+ trace->hardirq_disable_event, (void *)trace->hardirq_disable_ip,
+ (void *)trace->hardirq_disable_ip);
printk("softirqs last enabled at (%u): [<%px>] %pS\n",
- curr->softirq_enable_event, (void *)curr->softirq_enable_ip,
- (void *)curr->softirq_enable_ip);
+ trace->softirq_enable_event, (void *)trace->softirq_enable_ip,
+ (void *)trace->softirq_enable_ip);
printk("softirqs last disabled at (%u): [<%px>] %pS\n",
- curr->softirq_disable_event, (void *)curr->softirq_disable_ip,
- (void *)curr->softirq_disable_ip);
+ trace->softirq_disable_event, (void *)trace->softirq_disable_ip,
+ (void *)trace->softirq_disable_ip);
}
static int HARDIRQ_verbose(struct lock_class *class)
@@ -3646,10 +3648,19 @@ static void __trace_hardirqs_on_caller(void)
*/
void lockdep_hardirqs_on_prepare(unsigned long ip)
{
- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * NMIs do not (and cannot) track lock dependencies, nothing to do.
+ */
+ if (unlikely(in_nmi()))
+ return;
+
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
- if (unlikely(current->hardirqs_enabled)) {
+ if (unlikely(lockdep_hardirqs_enabled())) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3677,7 +3688,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
* Can't allow enabling interrupts while in an interrupt handler,
* that's general bad form and such. Recursion, limited stack etc..
*/
- if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()))
return;
current->hardirq_chain_key = current->curr_chain_key;
@@ -3690,12 +3701,35 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare);
void noinstr lockdep_hardirqs_on(unsigned long ip)
{
- struct task_struct *curr = current;
+ struct irqtrace_events *trace = &current->irqtrace;
+
+ if (unlikely(!debug_locks))
+ return;
+
+ /*
+ * NMIs can happen in the middle of local_irq_{en,dis}able() where the
+ * tracking state and hardware state are out of sync.
+ *
+ * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
+ * and not rely on hardware state like normal interrupts.
+ */
+ if (unlikely(in_nmi())) {
+ if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+ return;
+
+ /*
+ * Skip:
+ * - recursion check, because NMI can hit lockdep;
+ * - hardware state check, because above;
+ * - chain_key check, see lockdep_hardirqs_on_prepare().
+ */
+ goto skip_checks;
+ }
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled()) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3720,10 +3754,11 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
+skip_checks:
/* we'll do an OFF -> ON transition: */
- curr->hardirqs_enabled = 1;
- curr->hardirq_enable_ip = ip;
- curr->hardirq_enable_event = ++curr->irq_events;
+ this_cpu_write(hardirqs_enabled, 1);
+ trace->hardirq_enable_ip = ip;
+ trace->hardirq_enable_event = ++trace->irq_events;
debug_atomic_inc(hardirqs_on_events);
}
EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
@@ -3733,9 +3768,18 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
*/
void noinstr lockdep_hardirqs_off(unsigned long ip)
{
- struct task_struct *curr = current;
+ if (unlikely(!debug_locks))
+ return;
- if (unlikely(!debug_locks || curr->lockdep_recursion))
+ /*
+ * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
+ * they will restore the software state. This ensures the software
+ * state is consistent inside NMIs as well.
+ */
+ if (in_nmi()) {
+ if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+ return;
+ } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
return;
/*
@@ -3745,13 +3789,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled()) {
+ struct irqtrace_events *trace = &current->irqtrace;
+
/*
* We have done an ON -> OFF transition:
*/
- curr->hardirqs_enabled = 0;
- curr->hardirq_disable_ip = ip;
- curr->hardirq_disable_event = ++curr->irq_events;
+ this_cpu_write(hardirqs_enabled, 0);
+ trace->hardirq_disable_ip = ip;
+ trace->hardirq_disable_event = ++trace->irq_events;
debug_atomic_inc(hardirqs_off_events);
} else {
debug_atomic_inc(redundant_hardirqs_off);
@@ -3764,7 +3810,7 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
*/
void lockdep_softirqs_on(unsigned long ip)
{
- struct task_struct *curr = current;
+ struct irqtrace_events *trace = &current->irqtrace;
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -3776,7 +3822,7 @@ void lockdep_softirqs_on(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->softirqs_enabled) {
+ if (current->softirqs_enabled) {
debug_atomic_inc(redundant_softirqs_on);
return;
}
@@ -3785,17 +3831,17 @@ void lockdep_softirqs_on(unsigned long ip)
/*
* We'll do an OFF -> ON transition:
*/
- curr->softirqs_enabled = 1;
- curr->softirq_enable_ip = ip;
- curr->softirq_enable_event = ++curr->irq_events;
+ current->softirqs_enabled = 1;
+ trace->softirq_enable_ip = ip;
+ trace->softirq_enable_event = ++trace->irq_events;
debug_atomic_inc(softirqs_on_events);
/*
* We are going to turn softirqs on, so set the
* usage bit for all held locks, if hardirqs are
* enabled too:
*/
- if (curr->hardirqs_enabled)
- mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
+ if (lockdep_hardirqs_enabled())
+ mark_held_locks(current, LOCK_ENABLED_SOFTIRQ);
lockdep_recursion_finish();
}
@@ -3804,8 +3850,6 @@ void lockdep_softirqs_on(unsigned long ip)
*/
void lockdep_softirqs_off(unsigned long ip)
{
- struct task_struct *curr = current;
-
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -3815,13 +3859,15 @@ void lockdep_softirqs_off(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->softirqs_enabled) {
+ if (current->softirqs_enabled) {
+ struct irqtrace_events *trace = &current->irqtrace;
+
/*
* We have done an ON -> OFF transition:
*/
- curr->softirqs_enabled = 0;
- curr->softirq_disable_ip = ip;
- curr->softirq_disable_event = ++curr->irq_events;
+ current->softirqs_enabled = 0;
+ trace->softirq_disable_ip = ip;
+ trace->softirq_disable_event = ++trace->irq_events;
debug_atomic_inc(softirqs_off_events);
/*
* Whoops, we wanted softirqs off, so why aren't they?
@@ -3843,7 +3889,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
*/
if (!hlock->trylock) {
if (hlock->read) {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context())
if (!mark_lock(curr, hlock,
LOCK_USED_IN_HARDIRQ_READ))
return 0;
@@ -3852,7 +3898,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
LOCK_USED_IN_SOFTIRQ_READ))
return 0;
} else {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context())
if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
return 0;
if (curr->softirq_context)
@@ -3890,7 +3936,7 @@ lock_used:
static inline unsigned int task_irq_context(struct task_struct *task)
{
- return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context() +
LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}
@@ -3983,7 +4029,7 @@ static inline short task_wait_context(struct task_struct *curr)
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
- if (curr->hardirq_context) {
+ if (lockdep_hardirq_context()) {
/*
* Check if force_irqthreads will run us threaded.
*/
@@ -4826,11 +4872,11 @@ static void check_flags(unsigned long flags)
return;
if (irqs_disabled_flags(flags)) {
- if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) {
printk("possible reason: unannotated irqs-off.\n");
}
} else {
- if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())) {
printk("possible reason: unannotated irqs-on.\n");
}
}
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 1f7734949ac8..1de006ed3aa8 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -154,7 +154,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
*/
for (;;) {
- if (prev->next == node &&
+ /*
+ * cpu_relax() below implies a compiler barrier which would
+ * prevent this comparison being optimized away.
+ */
+ if (data_race(prev->next) == node &&
cmpxchg(&prev->next, node, NULL) == node)
break;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c4201b7f42b1..5e9aaa648a74 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending)
* where hardirqs are disabled legitimately:
*/
#ifdef CONFIG_TRACE_IRQFLAGS
+
+DEFINE_PER_CPU(int, hardirqs_enabled);
+DEFINE_PER_CPU(int, hardirq_context);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;
@@ -224,7 +230,7 @@ static inline bool lockdep_softirq_start(void)
{
bool in_hardirq = false;
- if (lockdep_hardirq_context(current)) {
+ if (lockdep_hardirq_context()) {
in_hardirq = true;
lockdep_hardirq_exit();
}