From cda099b37d7165fc73a63961739acf026444cde2 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 19 Feb 2020 11:00:54 -0800 Subject: fork: Annotate a data race in vm_area_dup() struct vm_area_struct could be accessed concurrently as noticed by KCSAN, write to 0xffff9cf8bba08ad8 of 8 bytes by task 14263 on cpu 35: vma_interval_tree_insert+0x101/0x150: rb_insert_augmented_cached at include/linux/rbtree_augmented.h:58 (inlined by) vma_interval_tree_insert at mm/interval_tree.c:23 __vma_link_file+0x6e/0xe0 __vma_link_file at mm/mmap.c:629 vma_link+0xa2/0x120 mmap_region+0x753/0xb90 do_mmap+0x45c/0x710 vm_mmap_pgoff+0xc0/0x130 ksys_mmap_pgoff+0x1d1/0x300 __x64_sys_mmap+0x33/0x40 do_syscall_64+0x91/0xc44 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffff9cf8bba08a80 of 200 bytes by task 14262 on cpu 122: vm_area_dup+0x6a/0xe0 vm_area_dup at kernel/fork.c:362 __split_vma+0x72/0x2a0 __split_vma at mm/mmap.c:2661 split_vma+0x5a/0x80 mprotect_fixup+0x368/0x3f0 do_mprotect_pkey+0x263/0x420 __x64_sys_mprotect+0x51/0x70 do_syscall_64+0x91/0xc44 entry_SYSCALL_64_after_hwframe+0x49/0xbe vm_area_dup() blindly copies all fields of original VMA to the new one. This includes coping vm_area_struct::shared.rb which is normally protected by i_mmap_lock. But this is fine because the read value will be overwritten on the following __vma_link_file() under proper protection. Thus, mark it as an intentional data race and insert a few assertions for the fields that should not be modified concurrently. Signed-off-by: Qian Cai Signed-off-by: Paul E. McKenney --- kernel/fork.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 142b23645d82..bba10fbcdce7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -359,7 +359,13 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (new) { - *new = *orig; + ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); + ASSERT_EXCLUSIVE_WRITER(orig->vm_file); + /* + * orig->shared.rb may be modified concurrently, but the clone + * will be reinitialized. + */ + *new = data_race(*orig); INIT_LIST_HEAD(&new->anon_vma_chain); new->vm_next = new->vm_prev = NULL; } -- cgit v1.2.3 From 1fe84fd4a4027a17d511a832f89ab14107650ba4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 5 May 2020 20:28:21 +0200 Subject: kcsan: Add test suite This adds KCSAN test focusing on behaviour of the integrated runtime. Tests various race scenarios, and verifies the reports generated to console. Makes use of KUnit for test organization, and the Torture framework for test thread control. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/Makefile | 3 + kernel/kcsan/kcsan-test.c | 1084 +++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.kcsan | 23 +- 3 files changed, 1109 insertions(+), 1 deletion(-) create mode 100644 kernel/kcsan/kcsan-test.c (limited to 'kernel') diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index d4999b38d1be..14533cf24bc3 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -12,3 +12,6 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \ obj-y := core.o debugfs.o report.o obj-$(CONFIG_KCSAN_SELFTEST) += test.o + +CFLAGS_kcsan-test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer +obj-$(CONFIG_KCSAN_TEST) += kcsan-test.o diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c new file mode 100644 index 000000000000..a8c11506dd2a --- /dev/null +++ b/kernel/kcsan/kcsan-test.c @@ -0,0 +1,1084 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KCSAN test with various race scenarious to test runtime behaviour. Since the + * interface with which KCSAN's reports are obtained is via the console, this is + * the output we should verify. For each test case checks the presence (or + * absence) of generated reports. Relies on 'console' tracepoint to capture + * reports as they appear in the kernel log. + * + * Makes use of KUnit for test organization, and the Torture framework for test + * thread control. + * + * Copyright (C) 2020, Google LLC. + * Author: Marco Elver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Points to current test-case memory access "kernels". */ +static void (*access_kernels[2])(void); + +static struct task_struct **threads; /* Lists of threads. */ +static unsigned long end_time; /* End time of test. */ + +/* Report as observed from console. */ +static struct { + spinlock_t lock; + int nlines; + char lines[3][512]; +} observed = { + .lock = __SPIN_LOCK_UNLOCKED(observed.lock), +}; + +/* Setup test checking loop. */ +static __no_kcsan_or_inline void +begin_test_checks(void (*func1)(void), void (*func2)(void)) +{ + kcsan_disable_current(); + + /* + * Require at least as long as KCSAN_REPORT_ONCE_IN_MS, to ensure at + * least one race is reported. + */ + end_time = jiffies + msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS + 500); + + /* Signal start; release potential initialization of shared data. */ + smp_store_release(&access_kernels[0], func1); + smp_store_release(&access_kernels[1], func2); +} + +/* End test checking loop. */ +static __no_kcsan_or_inline bool +end_test_checks(bool stop) +{ + if (!stop && time_before(jiffies, end_time)) { + /* Continue checking */ + might_sleep(); + return false; + } + + kcsan_enable_current(); + return true; +} + +/* + * Probe for console output: checks if a race was reported, and obtains observed + * lines of interest. + */ +__no_kcsan +static void probe_console(void *ignore, const char *buf, size_t len) +{ + unsigned long flags; + int nlines; + + /* + * Note that KCSAN reports under a global lock, so we do not risk the + * possibility of having multiple reports interleaved. If that were the + * case, we'd expect tests to fail. + */ + + spin_lock_irqsave(&observed.lock, flags); + nlines = observed.nlines; + + if (strnstr(buf, "BUG: KCSAN: ", len) && strnstr(buf, "test_", len)) { + /* + * KCSAN report and related to the test. + * + * The provided @buf is not NUL-terminated; copy no more than + * @len bytes and let strscpy() add the missing NUL-terminator. + */ + strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0]))); + nlines = 1; + } else if ((nlines == 1 || nlines == 2) && strnstr(buf, "bytes by", len)) { + strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0]))); + + if (strnstr(buf, "race at unknown origin", len)) { + if (WARN_ON(nlines != 2)) + goto out; + + /* No second line of interest. */ + strcpy(observed.lines[nlines++], ""); + } + } + +out: + WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */ + spin_unlock_irqrestore(&observed.lock, flags); +} + +/* Check if a report related to the test exists. */ +__no_kcsan +static bool report_available(void) +{ + return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines); +} + +/* Report information we expect in a report. */ +struct expect_report { + /* Access information of both accesses. */ + struct { + void *fn; /* Function pointer to expected function of top frame. */ + void *addr; /* Address of access; unchecked if NULL. */ + size_t size; /* Size of access; unchecked if @addr is NULL. */ + int type; /* Access type, see KCSAN_ACCESS definitions. */ + } access[2]; +}; + +/* Check observed report matches information in @r. */ +__no_kcsan +static bool report_matches(const struct expect_report *r) +{ + const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; + bool ret = false; + unsigned long flags; + typeof(observed.lines) expect; + const char *end; + char *cur; + int i; + + /* Doubled-checked locking. */ + if (!report_available()) + return false; + + /* Generate expected report contents. */ + + /* Title */ + cur = expect[0]; + end = &expect[0][sizeof(expect[0]) - 1]; + cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ", + is_assert ? "assert: race" : "data-race"); + if (r->access[1].fn) { + char tmp[2][64]; + int cmp; + + /* Expect lexographically sorted function names in title. */ + scnprintf(tmp[0], sizeof(tmp[0]), "%pS", r->access[0].fn); + scnprintf(tmp[1], sizeof(tmp[1]), "%pS", r->access[1].fn); + cmp = strcmp(tmp[0], tmp[1]); + cur += scnprintf(cur, end - cur, "%ps / %ps", + cmp < 0 ? r->access[0].fn : r->access[1].fn, + cmp < 0 ? r->access[1].fn : r->access[0].fn); + } else { + scnprintf(cur, end - cur, "%pS", r->access[0].fn); + /* The exact offset won't match, remove it. */ + cur = strchr(expect[0], '+'); + if (cur) + *cur = '\0'; + } + + /* Access 1 */ + cur = expect[1]; + end = &expect[1][sizeof(expect[1]) - 1]; + if (!r->access[1].fn) + cur += scnprintf(cur, end - cur, "race at unknown origin, with "); + + /* Access 1 & 2 */ + for (i = 0; i < 2; ++i) { + const char *const access_type = + (r->access[i].type & KCSAN_ACCESS_ASSERT) ? + ((r->access[i].type & KCSAN_ACCESS_WRITE) ? + "assert no accesses" : + "assert no writes") : + ((r->access[i].type & KCSAN_ACCESS_WRITE) ? + "write" : + "read"); + const char *const access_type_aux = + (r->access[i].type & KCSAN_ACCESS_ATOMIC) ? + " (marked)" : + ((r->access[i].type & KCSAN_ACCESS_SCOPED) ? + " (scoped)" : + ""); + + if (i == 1) { + /* Access 2 */ + cur = expect[2]; + end = &expect[2][sizeof(expect[2]) - 1]; + + if (!r->access[1].fn) { + /* Dummy string if no second access is available. */ + strcpy(cur, ""); + break; + } + } + + cur += scnprintf(cur, end - cur, "%s%s to ", access_type, + access_type_aux); + + if (r->access[i].addr) /* Address is optional. */ + cur += scnprintf(cur, end - cur, "0x%px of %zu bytes", + r->access[i].addr, r->access[i].size); + } + + spin_lock_irqsave(&observed.lock, flags); + if (!report_available()) + goto out; /* A new report is being captured. */ + + /* Finally match expected output to what we actually observed. */ + ret = strstr(observed.lines[0], expect[0]) && + /* Access info may appear in any order. */ + ((strstr(observed.lines[1], expect[1]) && + strstr(observed.lines[2], expect[2])) || + (strstr(observed.lines[1], expect[2]) && + strstr(observed.lines[2], expect[1]))); +out: + spin_unlock_irqrestore(&observed.lock, flags); + return ret; +} + +/* ===== Test kernels ===== */ + +static long test_sink; +static long test_var; +/* @test_array should be large enough to fall into multiple watchpoint slots. */ +static long test_array[3 * PAGE_SIZE / sizeof(long)]; +static struct { + long val[8]; +} test_struct; +static DEFINE_SEQLOCK(test_seqlock); + +/* + * Helper to avoid compiler optimizing out reads, and to generate source values + * for writes. + */ +__no_kcsan +static noinline void sink_value(long v) { WRITE_ONCE(test_sink, v); } + +static noinline void test_kernel_read(void) { sink_value(test_var); } + +static noinline void test_kernel_write(void) +{ + test_var = READ_ONCE_NOCHECK(test_sink) + 1; +} + +static noinline void test_kernel_write_nochange(void) { test_var = 42; } + +/* Suffixed by value-change exception filter. */ +static noinline void test_kernel_write_nochange_rcu(void) { test_var = 42; } + +static noinline void test_kernel_read_atomic(void) +{ + sink_value(READ_ONCE(test_var)); +} + +static noinline void test_kernel_write_atomic(void) +{ + WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1); +} + +__no_kcsan +static noinline void test_kernel_write_uninstrumented(void) { test_var++; } + +static noinline void test_kernel_data_race(void) { data_race(test_var++); } + +static noinline void test_kernel_assert_writer(void) +{ + ASSERT_EXCLUSIVE_WRITER(test_var); +} + +static noinline void test_kernel_assert_access(void) +{ + ASSERT_EXCLUSIVE_ACCESS(test_var); +} + +#define TEST_CHANGE_BITS 0xff00ff00 + +static noinline void test_kernel_change_bits(void) +{ + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { + /* + * Avoid race of unknown origin for this test, just pretend they + * are atomic. + */ + kcsan_nestable_atomic_begin(); + test_var ^= TEST_CHANGE_BITS; + kcsan_nestable_atomic_end(); + } else + WRITE_ONCE(test_var, READ_ONCE(test_var) ^ TEST_CHANGE_BITS); +} + +static noinline void test_kernel_assert_bits_change(void) +{ + ASSERT_EXCLUSIVE_BITS(test_var, TEST_CHANGE_BITS); +} + +static noinline void test_kernel_assert_bits_nochange(void) +{ + ASSERT_EXCLUSIVE_BITS(test_var, ~TEST_CHANGE_BITS); +} + +/* To check that scoped assertions do trigger anywhere in scope. */ +static noinline void test_enter_scope(void) +{ + int x = 0; + + /* Unrelated accesses to scoped assert. */ + READ_ONCE(test_sink); + kcsan_check_read(&x, sizeof(x)); +} + +static noinline void test_kernel_assert_writer_scoped(void) +{ + ASSERT_EXCLUSIVE_WRITER_SCOPED(test_var); + test_enter_scope(); +} + +static noinline void test_kernel_assert_access_scoped(void) +{ + ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_var); + test_enter_scope(); +} + +static noinline void test_kernel_rmw_array(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_array); ++i) + test_array[i]++; +} + +static noinline void test_kernel_write_struct(void) +{ + kcsan_check_write(&test_struct, sizeof(test_struct)); + kcsan_disable_current(); + test_struct.val[3]++; /* induce value change */ + kcsan_enable_current(); +} + +static noinline void test_kernel_write_struct_part(void) +{ + test_struct.val[3] = 42; +} + +static noinline void test_kernel_read_struct_zero_size(void) +{ + kcsan_check_read(&test_struct.val[3], 0); +} + +static noinline void test_kernel_seqlock_reader(void) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&test_seqlock); + sink_value(test_var); + } while (read_seqretry(&test_seqlock, seq)); +} + +static noinline void test_kernel_seqlock_writer(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&test_seqlock, flags); + test_var++; + write_sequnlock_irqrestore(&test_seqlock, flags); +} + +/* ===== Test cases ===== */ + +/* Simple test with normal data race. */ +__no_kcsan +static void test_basic(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + static const struct expect_report never = { + .access = { + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + bool match_expect = false; + bool match_never = false; + + begin_test_checks(test_kernel_write, test_kernel_read); + do { + match_expect |= report_matches(&expect); + match_never = report_matches(&never); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_TRUE(test, match_expect); + KUNIT_EXPECT_FALSE(test, match_never); +} + +/* + * Stress KCSAN with lots of concurrent races on different addresses until + * timeout. + */ +__no_kcsan +static void test_concurrent_races(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + /* NULL will match any address. */ + { test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE }, + { test_kernel_rmw_array, NULL, 0, 0 }, + }, + }; + static const struct expect_report never = { + .access = { + { test_kernel_rmw_array, NULL, 0, 0 }, + { test_kernel_rmw_array, NULL, 0, 0 }, + }, + }; + bool match_expect = false; + bool match_never = false; + + begin_test_checks(test_kernel_rmw_array, test_kernel_rmw_array); + do { + match_expect |= report_matches(&expect); + match_never |= report_matches(&never); + } while (!end_test_checks(false)); + KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check matches exist. */ + KUNIT_EXPECT_FALSE(test, match_never); +} + +/* Test the KCSAN_REPORT_VALUE_CHANGE_ONLY option. */ +__no_kcsan +static void test_novalue_change(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write_nochange, test_kernel_read); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY)) + KUNIT_EXPECT_FALSE(test, match_expect); + else + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* + * Test that the rules where the KCSAN_REPORT_VALUE_CHANGE_ONLY option should + * never apply work. + */ +__no_kcsan +static void test_novalue_change_exception(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write_nochange_rcu, test_kernel_read); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* Test that data races of unknown origin are reported. */ +__no_kcsan +static void test_unknown_origin(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + { NULL }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write_uninstrumented, test_kernel_read); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN)) + KUNIT_EXPECT_TRUE(test, match_expect); + else + KUNIT_EXPECT_FALSE(test, match_expect); +} + +/* Test KCSAN_ASSUME_PLAIN_WRITES_ATOMIC if it is selected. */ +__no_kcsan +static void test_write_write_assume_atomic(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write, test_kernel_write); + do { + sink_value(READ_ONCE(test_var)); /* induce value-change */ + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) + KUNIT_EXPECT_FALSE(test, match_expect); + else + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* + * Test that data races with writes larger than word-size are always reported, + * even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected. + */ +__no_kcsan +static void test_write_write_struct(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write_struct, test_kernel_write_struct); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* + * Test that data races where only one write is larger than word-size are always + * reported, even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected. + */ +__no_kcsan +static void test_write_write_struct_part(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + { test_kernel_write_struct_part, &test_struct.val[3], sizeof(test_struct.val[3]), KCSAN_ACCESS_WRITE }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_write_struct, test_kernel_write_struct_part); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* Test that races with atomic accesses never result in reports. */ +__no_kcsan +static void test_read_atomic_write_atomic(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_read_atomic, test_kernel_write_atomic); + do { + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + +/* Test that a race with an atomic and plain access result in reports. */ +__no_kcsan +static void test_read_plain_atomic_write(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + { test_kernel_write_atomic, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC }, + }, + }; + bool match_expect = false; + + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) + return; + + begin_test_checks(test_kernel_read, test_kernel_write_atomic); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +/* Zero-sized accesses should never cause data race reports. */ +__no_kcsan +static void test_zero_size_access(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + }, + }; + const struct expect_report never = { + .access = { + { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE }, + { test_kernel_read_struct_zero_size, &test_struct.val[3], 0, 0 }, + }, + }; + bool match_expect = false; + bool match_never = false; + + begin_test_checks(test_kernel_write_struct, test_kernel_read_struct_zero_size); + do { + match_expect |= report_matches(&expect); + match_never = report_matches(&never); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check. */ + KUNIT_EXPECT_FALSE(test, match_never); +} + +/* Test the data_race() macro. */ +__no_kcsan +static void test_data_race(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_data_race, test_kernel_data_race); + do { + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + +__no_kcsan +static void test_assert_exclusive_writer(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_assert_writer, test_kernel_write_nochange); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +__no_kcsan +static void test_assert_exclusive_access(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_assert_access, test_kernel_read); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +__no_kcsan +static void test_assert_exclusive_access_writer(struct kunit *test) +{ + const struct expect_report expect_access_writer = { + .access = { + { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, + { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, + }, + }; + const struct expect_report expect_access_access = { + .access = { + { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, + { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE }, + }, + }; + const struct expect_report never = { + .access = { + { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, + { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, + }, + }; + bool match_expect_access_writer = false; + bool match_expect_access_access = false; + bool match_never = false; + + begin_test_checks(test_kernel_assert_access, test_kernel_assert_writer); + do { + match_expect_access_writer |= report_matches(&expect_access_writer); + match_expect_access_access |= report_matches(&expect_access_access); + match_never |= report_matches(&never); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_TRUE(test, match_expect_access_writer); + KUNIT_EXPECT_TRUE(test, match_expect_access_access); + KUNIT_EXPECT_FALSE(test, match_never); +} + +__no_kcsan +static void test_assert_exclusive_bits_change(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_assert_bits_change, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT }, + { test_kernel_change_bits, &test_var, sizeof(test_var), + KCSAN_ACCESS_WRITE | (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) ? 0 : KCSAN_ACCESS_ATOMIC) }, + }, + }; + bool match_expect = false; + + begin_test_checks(test_kernel_assert_bits_change, test_kernel_change_bits); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + +__no_kcsan +static void test_assert_exclusive_bits_nochange(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_assert_bits_nochange, test_kernel_change_bits); + do { + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + +__no_kcsan +static void test_assert_exclusive_writer_scoped(struct kunit *test) +{ + const struct expect_report expect_start = { + .access = { + { test_kernel_assert_writer_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED }, + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; + const struct expect_report expect_anywhere = { + .access = { + { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED }, + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; + bool match_expect_start = false; + bool match_expect_anywhere = false; + + begin_test_checks(test_kernel_assert_writer_scoped, test_kernel_write_nochange); + do { + match_expect_start |= report_matches(&expect_start); + match_expect_anywhere |= report_matches(&expect_anywhere); + } while (!end_test_checks(match_expect_start && match_expect_anywhere)); + KUNIT_EXPECT_TRUE(test, match_expect_start); + KUNIT_EXPECT_TRUE(test, match_expect_anywhere); +} + +__no_kcsan +static void test_assert_exclusive_access_scoped(struct kunit *test) +{ + const struct expect_report expect_start1 = { + .access = { + { test_kernel_assert_access_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + const struct expect_report expect_start2 = { + .access = { expect_start1.access[0], expect_start1.access[0] }, + }; + const struct expect_report expect_inscope = { + .access = { + { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED }, + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + }, + }; + bool match_expect_start = false; + bool match_expect_inscope = false; + + begin_test_checks(test_kernel_assert_access_scoped, test_kernel_read); + end_time += msecs_to_jiffies(1000); /* This test requires a bit more time. */ + do { + match_expect_start |= report_matches(&expect_start1) || report_matches(&expect_start2); + match_expect_inscope |= report_matches(&expect_inscope); + } while (!end_test_checks(match_expect_start && match_expect_inscope)); + KUNIT_EXPECT_TRUE(test, match_expect_start); + KUNIT_EXPECT_TRUE(test, match_expect_inscope); +} + +/* Test that racing accesses in seqlock critical sections are not reported. */ +__no_kcsan +static void test_seqlock_noreport(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_seqlock_reader, test_kernel_seqlock_writer); + do { + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + +/* + * Each test case is run with different numbers of threads. Until KUnit supports + * passing arguments for each test case, we encode #threads in the test case + * name (read by get_num_threads()). [The '-' was chosen as a stylistic + * preference to separate test name and #threads.] + * + * The thread counts are chosen to cover potentially interesting boundaries and + * corner cases (range 2-5), and then stress the system with larger counts. + */ +#define KCSAN_KUNIT_CASE(test_name) \ + { .run_case = test_name, .name = #test_name "-02" }, \ + { .run_case = test_name, .name = #test_name "-03" }, \ + { .run_case = test_name, .name = #test_name "-04" }, \ + { .run_case = test_name, .name = #test_name "-05" }, \ + { .run_case = test_name, .name = #test_name "-08" }, \ + { .run_case = test_name, .name = #test_name "-16" } + +static struct kunit_case kcsan_test_cases[] = { + KCSAN_KUNIT_CASE(test_basic), + KCSAN_KUNIT_CASE(test_concurrent_races), + KCSAN_KUNIT_CASE(test_novalue_change), + KCSAN_KUNIT_CASE(test_novalue_change_exception), + KCSAN_KUNIT_CASE(test_unknown_origin), + KCSAN_KUNIT_CASE(test_write_write_assume_atomic), + KCSAN_KUNIT_CASE(test_write_write_struct), + KCSAN_KUNIT_CASE(test_write_write_struct_part), + KCSAN_KUNIT_CASE(test_read_atomic_write_atomic), + KCSAN_KUNIT_CASE(test_read_plain_atomic_write), + KCSAN_KUNIT_CASE(test_zero_size_access), + KCSAN_KUNIT_CASE(test_data_race), + KCSAN_KUNIT_CASE(test_assert_exclusive_writer), + KCSAN_KUNIT_CASE(test_assert_exclusive_access), + KCSAN_KUNIT_CASE(test_assert_exclusive_access_writer), + KCSAN_KUNIT_CASE(test_assert_exclusive_bits_change), + KCSAN_KUNIT_CASE(test_assert_exclusive_bits_nochange), + KCSAN_KUNIT_CASE(test_assert_exclusive_writer_scoped), + KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped), + KCSAN_KUNIT_CASE(test_seqlock_noreport), + {}, +}; + +/* ===== End test cases ===== */ + +/* Get number of threads encoded in test name. */ +static bool __no_kcsan +get_num_threads(const char *test, int *nthreads) +{ + int len = strlen(test); + + if (WARN_ON(len < 3)) + return false; + + *nthreads = test[len - 1] - '0'; + *nthreads += (test[len - 2] - '0') * 10; + + if (WARN_ON(*nthreads < 0)) + return false; + + return true; +} + +/* Concurrent accesses from interrupts. */ +__no_kcsan +static void access_thread_timer(struct timer_list *timer) +{ + static atomic_t cnt = ATOMIC_INIT(0); + unsigned int idx; + void (*func)(void); + + idx = (unsigned int)atomic_inc_return(&cnt) % ARRAY_SIZE(access_kernels); + /* Acquire potential initialization. */ + func = smp_load_acquire(&access_kernels[idx]); + if (func) + func(); +} + +/* The main loop for each thread. */ +__no_kcsan +static int access_thread(void *arg) +{ + struct timer_list timer; + unsigned int cnt = 0; + unsigned int idx; + void (*func)(void); + + timer_setup_on_stack(&timer, access_thread_timer, 0); + do { + might_sleep(); + + if (!timer_pending(&timer)) + mod_timer(&timer, jiffies + 1); + else { + /* Iterate through all kernels. */ + idx = cnt++ % ARRAY_SIZE(access_kernels); + /* Acquire potential initialization. */ + func = smp_load_acquire(&access_kernels[idx]); + if (func) + func(); + } + } while (!torture_must_stop()); + del_timer_sync(&timer); + destroy_timer_on_stack(&timer); + + torture_kthread_stopping("access_thread"); + return 0; +} + +__no_kcsan +static int test_init(struct kunit *test) +{ + unsigned long flags; + int nthreads; + int i; + + spin_lock_irqsave(&observed.lock, flags); + for (i = 0; i < ARRAY_SIZE(observed.lines); ++i) + observed.lines[i][0] = '\0'; + observed.nlines = 0; + spin_unlock_irqrestore(&observed.lock, flags); + + if (!torture_init_begin((char *)test->name, 1)) + return -EBUSY; + + if (!get_num_threads(test->name, &nthreads)) + goto err; + + if (WARN_ON(threads)) + goto err; + + for (i = 0; i < ARRAY_SIZE(access_kernels); ++i) { + if (WARN_ON(access_kernels[i])) + goto err; + } + + if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) { + /* + * Without any preemption, keep 2 CPUs free for other tasks, one + * of which is the main test case function checking for + * completion or failure. + */ + const int min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0; + const int min_required_cpus = 2 + min_unused_cpus; + + if (num_online_cpus() < min_required_cpus) { + pr_err("%s: too few online CPUs (%u < %d) for test", + test->name, num_online_cpus(), min_required_cpus); + goto err; + } else if (nthreads > num_online_cpus() - min_unused_cpus) { + nthreads = num_online_cpus() - min_unused_cpus; + pr_warn("%s: limiting number of threads to %d\n", + test->name, nthreads); + } + } + + if (nthreads) { + threads = kcalloc(nthreads + 1, sizeof(struct task_struct *), + GFP_KERNEL); + if (WARN_ON(!threads)) + goto err; + + threads[nthreads] = NULL; + for (i = 0; i < nthreads; ++i) { + if (torture_create_kthread(access_thread, NULL, + threads[i])) + goto err; + } + } + + torture_init_end(); + + return 0; + +err: + kfree(threads); + threads = NULL; + torture_init_end(); + return -EINVAL; +} + +__no_kcsan +static void test_exit(struct kunit *test) +{ + struct task_struct **stop_thread; + int i; + + if (torture_cleanup_begin()) + return; + + for (i = 0; i < ARRAY_SIZE(access_kernels); ++i) + WRITE_ONCE(access_kernels[i], NULL); + + if (threads) { + for (stop_thread = threads; *stop_thread; stop_thread++) + torture_stop_kthread(reader_thread, *stop_thread); + + kfree(threads); + threads = NULL; + } + + torture_cleanup_end(); +} + +static struct kunit_suite kcsan_test_suite = { + .name = "kcsan-test", + .test_cases = kcsan_test_cases, + .init = test_init, + .exit = test_exit, +}; +static struct kunit_suite *kcsan_test_suites[] = { &kcsan_test_suite, NULL }; + +__no_kcsan +static void register_tracepoints(struct tracepoint *tp, void *ignore) +{ + check_trace_callback_type_console(probe_console); + if (!strcmp(tp->name, "console")) + WARN_ON(tracepoint_probe_register(tp, probe_console, NULL)); +} + +__no_kcsan +static void unregister_tracepoints(struct tracepoint *tp, void *ignore) +{ + if (!strcmp(tp->name, "console")) + tracepoint_probe_unregister(tp, probe_console, NULL); +} + +/* + * We only want to do tracepoints setup and teardown once, therefore we have to + * customize the init and exit functions and cannot rely on kunit_test_suite(). + */ +static int __init kcsan_test_init(void) +{ + /* + * Because we want to be able to build the test as a module, we need to + * iterate through all known tracepoints, since the static registration + * won't work here. + */ + for_each_kernel_tracepoint(register_tracepoints, NULL); + return __kunit_test_suites_init(kcsan_test_suites); +} + +static void kcsan_test_exit(void) +{ + __kunit_test_suites_exit(kcsan_test_suites); + for_each_kernel_tracepoint(unregister_tracepoints, NULL); + tracepoint_synchronize_unregister(); +} + +late_initcall(kcsan_test_init); +module_exit(kcsan_test_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Marco Elver "); diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 5ee88e5119c2..3f3b5bca7a8f 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -59,7 +59,28 @@ config KCSAN_SELFTEST bool "Perform short selftests on boot" default y help - Run KCSAN selftests on boot. On test failure, causes the kernel to panic. + Run KCSAN selftests on boot. On test failure, causes the kernel to + panic. Recommended to be enabled, ensuring critical functionality + works as intended. + +config KCSAN_TEST + tristate "KCSAN test for integrated runtime behaviour" + depends on TRACEPOINTS && KUNIT + select TORTURE_TEST + help + KCSAN test focusing on behaviour of the integrated runtime. Tests + various race scenarios, and verifies the reports generated to + console. Makes use of KUnit for test organization, and the Torture + framework for test thread control. + + Each test case may run at least up to KCSAN_REPORT_ONCE_IN_MS + milliseconds. Test run duration may be optimized by building the + kernel and KCSAN test with KCSAN_REPORT_ONCE_IN_MS set to a lower + than default value. + + Say Y here if you want the test to be built into the kernel and run + during boot; say M if you want the test to build as a module; say N + if you are unsure. config KCSAN_EARLY_ENABLE bool "Early enable during boot" -- cgit v1.2.3 From 33190b675ce2eacbeb4e75168c05b41110b506ec Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 11 Feb 2020 08:54:15 -0500 Subject: locking/osq_lock: Annotate a data race in osq_lock The prev->next pointer can be accessed concurrently as noticed by KCSAN: write (marked) to 0xffff9d3370dbbe40 of 8 bytes by task 3294 on cpu 107: osq_lock+0x25f/0x350 osq_wait_next at kernel/locking/osq_lock.c:79 (inlined by) osq_lock at kernel/locking/osq_lock.c:185 rwsem_optimistic_spin read to 0xffff9d3370dbbe40 of 8 bytes by task 3398 on cpu 100: osq_lock+0x196/0x350 osq_lock at kernel/locking/osq_lock.c:157 rwsem_optimistic_spin Since the write only stores NULL to prev->next and the read tests if prev->next equals to this_cpu_ptr(&osq_node). Even if the value is shattered, the code is still working correctly. Thus, mark it as an intentional data race using the data_race() macro. Signed-off-by: Qian Cai Signed-off-by: Paul E. McKenney --- kernel/locking/osq_lock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 1f7734949ac8..1de006ed3aa8 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -154,7 +154,11 @@ bool osq_lock(struct optimistic_spin_queue *lock) */ for (;;) { - if (prev->next == node && + /* + * cpu_relax() below implies a compiler barrier which would + * prevent this comparison being optimized away. + */ + if (data_race(prev->next) == node && cmpxchg(&prev->next, node, NULL) == node) break; -- cgit v1.2.3 From 2888557f68db334a3839dcc262264a4c436f576b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 2 Jun 2020 16:36:33 +0200 Subject: kcsan: Prefer '__no_kcsan inline' in test Instead of __no_kcsan_or_inline, prefer '__no_kcsan inline' in test -- this is in case we decide to remove __no_kcsan_or_inline. Suggested-by: Peter Zijlstra Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c index a8c11506dd2a..3af420ad6ee7 100644 --- a/kernel/kcsan/kcsan-test.c +++ b/kernel/kcsan/kcsan-test.c @@ -43,7 +43,7 @@ static struct { }; /* Setup test checking loop. */ -static __no_kcsan_or_inline void +static __no_kcsan inline void begin_test_checks(void (*func1)(void), void (*func2)(void)) { kcsan_disable_current(); @@ -60,7 +60,7 @@ begin_test_checks(void (*func1)(void), void (*func2)(void)) } /* End test checking loop. */ -static __no_kcsan_or_inline bool +static __no_kcsan inline bool end_test_checks(bool stop) { if (!stop && time_before(jiffies, end_time)) { -- cgit v1.2.3 From 9dd979bae4cf76558ff816abe83283308fb1ae8c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 14:36:22 +0200 Subject: kcsan: Silence -Wmissing-prototypes warning with W=1 The functions here should not be forward declared for explicit use elsewhere in the kernel, as they should only be emitted by the compiler due to sanitizer instrumentation. Add forward declarations a line above their definition to shut up warnings in W=1 builds. Link: https://lkml.kernel.org/r/202006060103.jSCpnV1g%lkp@intel.com Reported-by: kernel test robot Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 15f67949d11e..1866bafda4fd 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -754,6 +754,7 @@ EXPORT_SYMBOL(__kcsan_check_access); */ #define DEFINE_TSAN_READ_WRITE(size) \ + void __tsan_read##size(void *ptr); \ void __tsan_read##size(void *ptr) \ { \ check_access(ptr, size, 0); \ @@ -762,6 +763,7 @@ EXPORT_SYMBOL(__kcsan_check_access); void __tsan_unaligned_read##size(void *ptr) \ __alias(__tsan_read##size); \ EXPORT_SYMBOL(__tsan_unaligned_read##size); \ + void __tsan_write##size(void *ptr); \ void __tsan_write##size(void *ptr) \ { \ check_access(ptr, size, KCSAN_ACCESS_WRITE); \ @@ -777,12 +779,14 @@ DEFINE_TSAN_READ_WRITE(4); DEFINE_TSAN_READ_WRITE(8); DEFINE_TSAN_READ_WRITE(16); +void __tsan_read_range(void *ptr, size_t size); void __tsan_read_range(void *ptr, size_t size) { check_access(ptr, size, 0); } EXPORT_SYMBOL(__tsan_read_range); +void __tsan_write_range(void *ptr, size_t size); void __tsan_write_range(void *ptr, size_t size) { check_access(ptr, size, KCSAN_ACCESS_WRITE); @@ -799,6 +803,7 @@ EXPORT_SYMBOL(__tsan_write_range); * the size-check of compiletime_assert_rwonce_type(). */ #define DEFINE_TSAN_VOLATILE_READ_WRITE(size) \ + void __tsan_volatile_read##size(void *ptr); \ void __tsan_volatile_read##size(void *ptr) \ { \ const bool is_atomic = size <= sizeof(long long) && \ @@ -811,6 +816,7 @@ EXPORT_SYMBOL(__tsan_write_range); void __tsan_unaligned_volatile_read##size(void *ptr) \ __alias(__tsan_volatile_read##size); \ EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size); \ + void __tsan_volatile_write##size(void *ptr); \ void __tsan_volatile_write##size(void *ptr) \ { \ const bool is_atomic = size <= sizeof(long long) && \ @@ -836,14 +842,17 @@ DEFINE_TSAN_VOLATILE_READ_WRITE(16); * The below are not required by KCSAN, but can still be emitted by the * compiler. */ +void __tsan_func_entry(void *call_pc); void __tsan_func_entry(void *call_pc) { } EXPORT_SYMBOL(__tsan_func_entry); +void __tsan_func_exit(void); void __tsan_func_exit(void) { } EXPORT_SYMBOL(__tsan_func_exit); +void __tsan_init(void); void __tsan_init(void) { } -- cgit v1.2.3 From acfa087ccf2d2eff46186477f53e4c3ffbdb033d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 14:36:23 +0200 Subject: kcsan: Rename test.c to selftest.c Rename 'test.c' to 'selftest.c' to better reflect its purpose (Kconfig variable and code inside already match this). This is to avoid confusion with the test suite module in 'kcsan-test.c'. No functional change. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/Makefile | 2 +- kernel/kcsan/selftest.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/kcsan/test.c | 131 ------------------------------------------------ 3 files changed, 132 insertions(+), 132 deletions(-) create mode 100644 kernel/kcsan/selftest.c delete mode 100644 kernel/kcsan/test.c (limited to 'kernel') diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index 14533cf24bc3..092ce58d2e56 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -11,7 +11,7 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \ $(call cc-option,-fno-stack-protector,) obj-y := core.o debugfs.o report.o -obj-$(CONFIG_KCSAN_SELFTEST) += test.o +obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o CFLAGS_kcsan-test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer obj-$(CONFIG_KCSAN_TEST) += kcsan-test.o diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c new file mode 100644 index 000000000000..d26a052d3383 --- /dev/null +++ b/kernel/kcsan/selftest.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#include "encoding.h" + +#define ITERS_PER_TEST 2000 + +/* Test requirements. */ +static bool test_requires(void) +{ + /* random should be initialized for the below tests */ + return prandom_u32() + prandom_u32() != 0; +} + +/* + * Test watchpoint encode and decode: check that encoding some access's info, + * and then subsequent decode preserves the access's info. + */ +static bool test_encode_decode(void) +{ + int i; + + for (i = 0; i < ITERS_PER_TEST; ++i) { + size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1; + bool is_write = !!prandom_u32_max(2); + unsigned long addr; + + prandom_bytes(&addr, sizeof(addr)); + if (WARN_ON(!check_encodable(addr, size))) + return false; + + /* Encode and decode */ + { + const long encoded_watchpoint = + encode_watchpoint(addr, size, is_write); + unsigned long verif_masked_addr; + size_t verif_size; + bool verif_is_write; + + /* Check special watchpoints */ + if (WARN_ON(decode_watchpoint( + INVALID_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(decode_watchpoint( + CONSUMED_WATCHPOINT, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + + /* Check decoding watchpoint returns same data */ + if (WARN_ON(!decode_watchpoint( + encoded_watchpoint, &verif_masked_addr, + &verif_size, &verif_is_write))) + return false; + if (WARN_ON(verif_masked_addr != + (addr & WATCHPOINT_ADDR_MASK))) + goto fail; + if (WARN_ON(verif_size != size)) + goto fail; + if (WARN_ON(is_write != verif_is_write)) + goto fail; + + continue; +fail: + pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", + __func__, is_write ? "write" : "read", size, + addr, encoded_watchpoint, + verif_is_write ? "write" : "read", verif_size, + verif_masked_addr); + return false; + } + } + + return true; +} + +/* Test access matching function. */ +static bool test_matching_access(void) +{ + if (WARN_ON(!matching_access(10, 1, 10, 1))) + return false; + if (WARN_ON(!matching_access(10, 2, 11, 1))) + return false; + if (WARN_ON(!matching_access(10, 1, 9, 2))) + return false; + if (WARN_ON(matching_access(10, 1, 11, 1))) + return false; + if (WARN_ON(matching_access(9, 1, 10, 1))) + return false; + + /* + * An access of size 0 could match another access, as demonstrated here. + * Rather than add more comparisons to 'matching_access()', which would + * end up in the fast-path for *all* checks, check_access() simply + * returns for all accesses of size 0. + */ + if (WARN_ON(!matching_access(8, 8, 12, 0))) + return false; + + return true; +} + +static int __init kcsan_selftest(void) +{ + int passed = 0; + int total = 0; + +#define RUN_TEST(do_test) \ + do { \ + ++total; \ + if (do_test()) \ + ++passed; \ + else \ + pr_err("KCSAN selftest: " #do_test " failed"); \ + } while (0) + + RUN_TEST(test_requires); + RUN_TEST(test_encode_decode); + RUN_TEST(test_matching_access); + + pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total); + if (passed != total) + panic("KCSAN selftests failed"); + return 0; +} +postcore_initcall(kcsan_selftest); diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c deleted file mode 100644 index d26a052d3383..000000000000 --- a/kernel/kcsan/test.c +++ /dev/null @@ -1,131 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include -#include - -#include "encoding.h" - -#define ITERS_PER_TEST 2000 - -/* Test requirements. */ -static bool test_requires(void) -{ - /* random should be initialized for the below tests */ - return prandom_u32() + prandom_u32() != 0; -} - -/* - * Test watchpoint encode and decode: check that encoding some access's info, - * and then subsequent decode preserves the access's info. - */ -static bool test_encode_decode(void) -{ - int i; - - for (i = 0; i < ITERS_PER_TEST; ++i) { - size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1; - bool is_write = !!prandom_u32_max(2); - unsigned long addr; - - prandom_bytes(&addr, sizeof(addr)); - if (WARN_ON(!check_encodable(addr, size))) - return false; - - /* Encode and decode */ - { - const long encoded_watchpoint = - encode_watchpoint(addr, size, is_write); - unsigned long verif_masked_addr; - size_t verif_size; - bool verif_is_write; - - /* Check special watchpoints */ - if (WARN_ON(decode_watchpoint( - INVALID_WATCHPOINT, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - if (WARN_ON(decode_watchpoint( - CONSUMED_WATCHPOINT, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - - /* Check decoding watchpoint returns same data */ - if (WARN_ON(!decode_watchpoint( - encoded_watchpoint, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - if (WARN_ON(verif_masked_addr != - (addr & WATCHPOINT_ADDR_MASK))) - goto fail; - if (WARN_ON(verif_size != size)) - goto fail; - if (WARN_ON(is_write != verif_is_write)) - goto fail; - - continue; -fail: - pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", - __func__, is_write ? "write" : "read", size, - addr, encoded_watchpoint, - verif_is_write ? "write" : "read", verif_size, - verif_masked_addr); - return false; - } - } - - return true; -} - -/* Test access matching function. */ -static bool test_matching_access(void) -{ - if (WARN_ON(!matching_access(10, 1, 10, 1))) - return false; - if (WARN_ON(!matching_access(10, 2, 11, 1))) - return false; - if (WARN_ON(!matching_access(10, 1, 9, 2))) - return false; - if (WARN_ON(matching_access(10, 1, 11, 1))) - return false; - if (WARN_ON(matching_access(9, 1, 10, 1))) - return false; - - /* - * An access of size 0 could match another access, as demonstrated here. - * Rather than add more comparisons to 'matching_access()', which would - * end up in the fast-path for *all* checks, check_access() simply - * returns for all accesses of size 0. - */ - if (WARN_ON(!matching_access(8, 8, 12, 0))) - return false; - - return true; -} - -static int __init kcsan_selftest(void) -{ - int passed = 0; - int total = 0; - -#define RUN_TEST(do_test) \ - do { \ - ++total; \ - if (do_test()) \ - ++passed; \ - else \ - pr_err("KCSAN selftest: " #do_test " failed"); \ - } while (0) - - RUN_TEST(test_requires); - RUN_TEST(test_encode_decode); - RUN_TEST(test_matching_access); - - pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total); - if (passed != total) - panic("KCSAN selftests failed"); - return 0; -} -postcore_initcall(kcsan_selftest); -- cgit v1.2.3 From 7e766560e6e2c1cf2782f00e63c31564e4c9f0fe Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 14:36:24 +0200 Subject: kcsan: Remove existing special atomic rules Remove existing special atomic rules from kcsan_is_atomic_special() because they are no longer needed. Since we rely on the compiler emitting instrumentation distinguishing volatile accesses, the rules have become redundant. Let's keep kcsan_is_atomic_special() around, so that we have an obvious place to add special rules should the need arise in future. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/atomic.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index be9e625227f3..75fe701f4127 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -3,8 +3,7 @@ #ifndef _KERNEL_KCSAN_ATOMIC_H #define _KERNEL_KCSAN_ATOMIC_H -#include -#include +#include /* * Special rules for certain memory where concurrent conflicting accesses are @@ -13,8 +12,7 @@ */ static bool kcsan_is_atomic_special(const volatile void *ptr) { - /* volatile globals that have been observed in data races. */ - return ptr == &jiffies || ptr == ¤t->state; + return false; } #endif /* _KERNEL_KCSAN_ATOMIC_H */ -- cgit v1.2.3 From 56b031f0abf55254d47a329010574733fa9a27b8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 14:36:25 +0200 Subject: kcsan: Add jiffies test to test suite Add a test that KCSAN nor the compiler gets confused about accesses to jiffies on different architectures. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan-test.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c index 3af420ad6ee7..fed6fcb5768c 100644 --- a/kernel/kcsan/kcsan-test.c +++ b/kernel/kcsan/kcsan-test.c @@ -366,6 +366,11 @@ static noinline void test_kernel_read_struct_zero_size(void) kcsan_check_read(&test_struct.val[3], 0); } +static noinline void test_kernel_jiffies_reader(void) +{ + sink_value((long)jiffies); +} + static noinline void test_kernel_seqlock_reader(void) { unsigned int seq; @@ -817,6 +822,23 @@ static void test_assert_exclusive_access_scoped(struct kunit *test) KUNIT_EXPECT_TRUE(test, match_expect_inscope); } +/* + * jiffies is special (declared to be volatile) and its accesses are typically + * not marked; this test ensures that the compiler nor KCSAN gets confused about + * jiffies's declaration on different architectures. + */ +__no_kcsan +static void test_jiffies_noreport(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_jiffies_reader, test_kernel_jiffies_reader); + do { + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + /* Test that racing accesses in seqlock critical sections are not reported. */ __no_kcsan static void test_seqlock_noreport(struct kunit *test) @@ -867,6 +889,7 @@ static struct kunit_case kcsan_test_cases[] = { KCSAN_KUNIT_CASE(test_assert_exclusive_bits_nochange), KCSAN_KUNIT_CASE(test_assert_exclusive_writer_scoped), KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped), + KCSAN_KUNIT_CASE(test_jiffies_noreport), KCSAN_KUNIT_CASE(test_seqlock_noreport), {}, }; -- cgit v1.2.3 From 2839a232071f588d334543fb86f5689b43353842 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jun 2020 11:31:17 +0200 Subject: kcsan: Simplify compiler flags Simplify the set of compiler flags for the runtime by removing cc-option from -fno-stack-protector, because all supported compilers support it. This saves us one compiler invocation during build. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index 092ce58d2e56..fea064afc4f7 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -7,8 +7,8 @@ CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) -CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \ - $(call cc-option,-fno-stack-protector,) +CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \ + -fno-stack-protector obj-y := core.o debugfs.o report.o obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o -- cgit v1.2.3 From 61d56d7aa5eca3b909bce51ba8125b0fa44d7e17 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jun 2020 11:31:18 +0200 Subject: kcsan: Disable branch tracing in core runtime Disable branch tracing in core KCSAN runtime if branches are being traced (TRACE_BRANCH_PROFILING). This it to avoid its performance impact, but also avoid recursion in case KCSAN is enabled for the branch tracing runtime. The latter had already been a problem for KASAN: https://lore.kernel.org/lkml/CANpmjNOeXmD5E3O50Z3MjkiuCYaYOPyi+1rq=GZvEKwBvLR0Ug@mail.gmail.com/ Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index fea064afc4f7..65ca5539c470 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -8,7 +8,7 @@ CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \ - -fno-stack-protector + -fno-stack-protector -DDISABLE_BRANCH_PROFILING obj-y := core.o debugfs.o report.o obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o -- cgit v1.2.3 From 248591f5d257a19c1cba9ab9da3536bfbc2f0149 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 24 Jun 2020 13:32:46 +0200 Subject: kcsan: Make KCSAN compatible with new IRQ state tracking The new IRQ state tracking code does not honor lockdep_off(), and as such we should again permit tracing by using non-raw functions in core.c. Update the lockdep_off() comment in report.c, to reflect the fact there is still a potential risk of deadlock due to using printk() from scheduler code. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200624113246.GA170324@elver.google.com --- kernel/kcsan/core.c | 5 ++--- kernel/kcsan/report.c | 9 +++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 15f67949d11e..732623c30359 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) } if (!kcsan_interrupt_watcher) - /* Use raw to avoid lockdep recursion via IRQ flags tracing. */ - raw_local_irq_save(irq_flags); + local_irq_save(irq_flags); watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { @@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: if (!kcsan_interrupt_watcher) - raw_local_irq_restore(irq_flags); + local_irq_restore(irq_flags); out: user_access_restore(ua_flags); } diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ac5f8345bae9..6b2fb1a6d8cd 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, goto out; /* - * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if - * we do not turn off lockdep here; this could happen due to recursion - * into lockdep via KCSAN if we detect a race in utilities used by - * lockdep. + * Because we may generate reports when we're in scheduler code, the use + * of printk() could deadlock. Until such time that all printing code + * called in print_report() is scheduler-safe, accept the risk, and just + * get our message out. As such, also disable lockdep to hide the + * warning, and avoid disabling lockdep for the rest of the kernel. */ lockdep_off(); -- cgit v1.2.3 From 859d069ee1ddd87862e1d6a356a82ed417dbeb67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2020 15:00:57 +0200 Subject: lockdep: Prepare for NMI IRQ state tracking There is no reason not to always, accurately, track IRQ state. This change also makes IRQ state tracking ignore lockdep_off(). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.155449112@infradead.org --- kernel/locking/lockdep.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 29a8de4c50b9..d595623c4b34 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task) static __always_inline void lockdep_recursion_finish(void) { - if (WARN_ON_ONCE(--current->lockdep_recursion)) + if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK)) current->lockdep_recursion = 0; } @@ -3646,7 +3646,16 @@ static void __trace_hardirqs_on_caller(void) */ void lockdep_hardirqs_on_prepare(unsigned long ip) { - if (unlikely(!debug_locks || current->lockdep_recursion)) + if (unlikely(!debug_locks)) + return; + + /* + * NMIs do not (and cannot) track lock dependencies, nothing to do. + */ + if (unlikely(in_nmi())) + return; + + if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; if (unlikely(current->hardirqs_enabled)) { @@ -3692,7 +3701,27 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) { struct task_struct *curr = current; - if (unlikely(!debug_locks || curr->lockdep_recursion)) + if (unlikely(!debug_locks)) + return; + + /* + * NMIs can happen in the middle of local_irq_{en,dis}able() where the + * tracking state and hardware state are out of sync. + * + * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from, + * and not rely on hardware state like normal interrupts. + */ + if (unlikely(in_nmi())) { + /* + * Skip: + * - recursion check, because NMI can hit lockdep; + * - hardware state check, because above; + * - chain_key check, see lockdep_hardirqs_on_prepare(). + */ + goto skip_checks; + } + + if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; if (curr->hardirqs_enabled) { @@ -3720,6 +3749,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key != current->curr_chain_key); +skip_checks: /* we'll do an OFF -> ON transition: */ curr->hardirqs_enabled = 1; curr->hardirq_enable_ip = ip; @@ -3735,7 +3765,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) { struct task_struct *curr = current; - if (unlikely(!debug_locks || curr->lockdep_recursion)) + if (unlikely(!debug_locks)) + return; + + /* + * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep; + * they will restore the software state. This ensures the software + * state is consistent inside NMIs as well. + */ + if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK))) return; /* -- cgit v1.2.3 From a21ee6055c30ce68c4e201c6496f0ed2a1936230 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2020 12:22:41 +0200 Subject: lockdep: Change hardirq{s_enabled,_context} to per-cpu variables Currently all IRQ-tracking state is in task_struct, this means that task_struct needs to be defined before we use it. Especially for lockdep_assert_irq*() this can lead to header-hell. Move the hardirq state into per-cpu variables to avoid the task_struct dependency. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.512673481@infradead.org --- include/linux/irqflags.h | 19 ++++++++++++------- include/linux/lockdep.h | 34 ++++++++++++++++++---------------- include/linux/sched.h | 2 -- kernel/fork.c | 4 +--- kernel/locking/lockdep.c | 30 +++++++++++++++--------------- kernel/softirq.c | 6 ++++++ 6 files changed, 52 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 6384d2813ded..255444fe4609 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -14,6 +14,7 @@ #include #include +#include /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING @@ -31,18 +32,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); + extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) -# define lockdep_hardirq_enter() \ -do { \ - if (!current->hardirq_context++) \ - current->hardirq_threaded = 0; \ +# define lockdep_hardirq_enter() \ +do { \ + if (this_cpu_inc_return(hardirq_context) == 1) \ + current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ do { \ @@ -50,7 +55,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - current->hardirq_context--; \ + this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 3b73cf84f77d..be6cb17a8879 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -11,6 +11,7 @@ #define __LINUX_LOCKDEP_H #include +#include struct task_struct; @@ -529,28 +530,29 @@ do { \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -#define lockdep_assert_irqs_enabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirqs_enabled, \ - "IRQs not enabled as expected\n"); \ - } while (0) +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); -#define lockdep_assert_irqs_disabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirqs_enabled, \ - "IRQs not disabled as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_enabled() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ +} while (0) -#define lockdep_assert_in_irq() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirq_context, \ - "Not in hardirq as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_disabled() \ +do { \ + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ +} while (0) + +#define lockdep_assert_in_irq() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ +} while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) @@ -560,7 +562,7 @@ do { \ # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirq_context && \ + lockdep_hardirq_context(current) && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 692e327d7455..3903a9500926 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -990,8 +990,6 @@ struct task_struct { unsigned long hardirq_disable_ip; unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; u64 hardirq_chain_key; unsigned long softirq_disable_ip; unsigned long softirq_enable_ip; diff --git a/kernel/fork.c b/kernel/fork.c index efc5493203ae..70d9d0a4de2a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1954,8 +1954,8 @@ static __latent_entropy struct task_struct *copy_process( rt_mutex_init_task(p); + lockdep_assert_irqs_enabled(); #ifdef CONFIG_PROVE_LOCKING - DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; @@ -2036,7 +2036,6 @@ static __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; - p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; @@ -2046,7 +2045,6 @@ static __latent_entropy struct task_struct *copy_process( p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; - p->hardirq_context = 0; p->softirq_context = 0; #endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d595623c4b34..ab4ffbe0e9e9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn("-----------------------------------------------------\n"); pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), - curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - curr->hardirqs_enabled, + lockdep_hardirqs_enabled(curr), curr->softirqs_enabled); print_lock(next); @@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (unlikely(current->hardirqs_enabled)) { + if (unlikely(lockdep_hardirqs_enabled(current))) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) * Can't allow enabling interrupts while in an interrupt handler, * that's general bad form and such. Recursion, limited stack etc.. */ - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current))) return; current->hardirq_chain_key = current->curr_chain_key; @@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (curr->hardirqs_enabled) { + if (lockdep_hardirqs_enabled(curr)) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3751,7 +3751,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) skip_checks: /* we'll do an OFF -> ON transition: */ - curr->hardirqs_enabled = 1; + this_cpu_write(hardirqs_enabled, 1); curr->hardirq_enable_ip = ip; curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(hardirqs_on_events); @@ -3783,11 +3783,11 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (curr->hardirqs_enabled) { + if (lockdep_hardirqs_enabled(curr)) { /* * We have done an ON -> OFF transition: */ - curr->hardirqs_enabled = 0; + this_cpu_write(hardirqs_enabled, 0); curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(hardirqs_off_events); @@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip) * usage bit for all held locks, if hardirqs are * enabled too: */ - if (curr->hardirqs_enabled) + if (lockdep_hardirqs_enabled(curr)) mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); lockdep_recursion_finish(); } @@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) */ if (!hlock->trylock) { if (hlock->read) { - if (curr->hardirq_context) + if (lockdep_hardirq_context(curr)) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ_READ)) return 0; @@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) LOCK_USED_IN_SOFTIRQ_READ)) return 0; } else { - if (curr->hardirq_context) + if (lockdep_hardirq_context(curr)) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) return 0; if (curr->softirq_context) @@ -3928,7 +3928,7 @@ lock_used: static inline unsigned int task_irq_context(struct task_struct *task) { - return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context + + return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) + LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context; } @@ -4021,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr) * Set appropriate wait type for the context; for IRQs we have to take * into account force_irqthread as that is implied by PREEMPT_RT. */ - if (curr->hardirq_context) { + if (lockdep_hardirq_context(curr)) { /* * Check if force_irqthreads will run us threaded. */ @@ -4864,11 +4864,11 @@ static void check_flags(unsigned long flags) return; if (irqs_disabled_flags(flags)) { - if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) { + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) { printk("possible reason: unannotated irqs-off.\n"); } } else { - if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) { + if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) { printk("possible reason: unannotated irqs-on.\n"); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index c4201b7f42b1..342c53feaa7a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending) * where hardirqs are disabled legitimately: */ #ifdef CONFIG_TRACE_IRQFLAGS + +DEFINE_PER_CPU(int, hardirqs_enabled); +DEFINE_PER_CPU(int, hardirq_context); +EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled); +EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); + void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; -- cgit v1.2.3 From f9ad4a5f3f20bee022b1bdde94e5ece6dc0b0edc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2020 13:03:26 +0200 Subject: lockdep: Remove lockdep_hardirq{s_enabled,_context}() argument Now that the macros use per-cpu data, we no longer need the argument. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.571835311@infradead.org --- arch/x86/entry/common.c | 2 +- include/linux/irqflags.h | 8 ++++---- include/linux/lockdep.h | 2 +- kernel/locking/lockdep.c | 30 +++++++++++++++--------------- kernel/softirq.c | 2 +- tools/include/linux/irqflags.h | 4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 63c607dd6c52..4ea640363f5d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -758,7 +758,7 @@ noinstr void idtentry_exit_user(struct pt_regs *regs) noinstr bool idtentry_enter_nmi(struct pt_regs *regs) { - bool irq_state = lockdep_hardirqs_enabled(current); + bool irq_state = lockdep_hardirqs_enabled(); __nmi_enter(); lockdep_hardirqs_off(CALLER_ADDR0); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 255444fe4609..5811ee8a5cd8 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -40,9 +40,9 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context)) +# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) +# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ @@ -109,9 +109,9 @@ do { \ # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index be6cb17a8879..fd04b9e96091 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -562,7 +562,7 @@ do { \ # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - lockdep_hardirq_context(current) && \ + lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ab4ffbe0e9e9..c9ea05edce25 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn("-----------------------------------------------------\n"); pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), curr->softirqs_enabled); print_lock(next); @@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), lockdep_softirqs_enabled(curr)); print_lock(this); @@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (unlikely(lockdep_hardirqs_enabled(current))) { + if (unlikely(lockdep_hardirqs_enabled())) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) * Can't allow enabling interrupts while in an interrupt handler, * that's general bad form and such. Recursion, limited stack etc.. */ - if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current))) + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())) return; current->hardirq_chain_key = current->curr_chain_key; @@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (lockdep_hardirqs_enabled(curr)) { + if (lockdep_hardirqs_enabled()) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3783,7 +3783,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (lockdep_hardirqs_enabled(curr)) { + if (lockdep_hardirqs_enabled()) { /* * We have done an ON -> OFF transition: */ @@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip) * usage bit for all held locks, if hardirqs are * enabled too: */ - if (lockdep_hardirqs_enabled(curr)) + if (lockdep_hardirqs_enabled()) mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); lockdep_recursion_finish(); } @@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) */ if (!hlock->trylock) { if (hlock->read) { - if (lockdep_hardirq_context(curr)) + if (lockdep_hardirq_context()) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ_READ)) return 0; @@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) LOCK_USED_IN_SOFTIRQ_READ)) return 0; } else { - if (lockdep_hardirq_context(curr)) + if (lockdep_hardirq_context()) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) return 0; if (curr->softirq_context) @@ -3928,7 +3928,7 @@ lock_used: static inline unsigned int task_irq_context(struct task_struct *task) { - return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) + + return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context() + LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context; } @@ -4021,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr) * Set appropriate wait type for the context; for IRQs we have to take * into account force_irqthread as that is implied by PREEMPT_RT. */ - if (lockdep_hardirq_context(curr)) { + if (lockdep_hardirq_context()) { /* * Check if force_irqthreads will run us threaded. */ @@ -4864,11 +4864,11 @@ static void check_flags(unsigned long flags) return; if (irqs_disabled_flags(flags)) { - if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) { + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-off.\n"); } } else { - if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) { + if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-on.\n"); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index 342c53feaa7a..5e9aaa648a74 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -230,7 +230,7 @@ static inline bool lockdep_softirq_start(void) { bool in_hardirq = false; - if (lockdep_hardirq_context(current)) { + if (lockdep_hardirq_context()) { in_hardirq = true; lockdep_hardirq_exit(); } diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h index 67e01bbadbfe..501262aee8ff 100644 --- a/tools/include/linux/irqflags.h +++ b/tools/include/linux/irqflags.h @@ -2,9 +2,9 @@ #ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ #define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) -- cgit v1.2.3 From 9180bd467f9abdb44afde650d07e3b9dd66d837c Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 2 Jul 2020 17:28:40 -0300 Subject: futex: Remove put_futex_key() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 4b39f99c ("futex: Remove {get,drop}_futex_key_refs()"), put_futex_key() is empty. Remove all references for this function and the then redundant labels. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200702202843.520764-2-andrealmeid@collabora.com --- kernel/futex.c | 61 ++++++++++++---------------------------------------------- 1 file changed, 12 insertions(+), 49 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index e646661f6282..bd9adfca5d51 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -677,10 +677,6 @@ out: return err; } -static inline void put_futex_key(union futex_key *key) -{ -} - /** * fault_in_user_writeable() - Fault in user address and verify RW access * @uaddr: pointer to faulting user space address @@ -1617,7 +1613,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) /* Make sure we really have tasks to wakeup */ if (!hb_waiters_pending(hb)) - goto out_put_key; + goto out; spin_lock(&hb->lock); @@ -1640,8 +1636,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) spin_unlock(&hb->lock); wake_up_q(&wake_q); -out_put_key: - put_futex_key(&key); out: return ret; } @@ -1712,7 +1706,7 @@ retry: goto out; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) - goto out_put_key1; + goto out; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1730,13 +1724,13 @@ retry_private: * an MMU, but we might get them from range checking */ ret = op_ret; - goto out_put_keys; + goto out; } if (op_ret == -EFAULT) { ret = fault_in_user_writeable(uaddr2); if (ret) - goto out_put_keys; + goto out; } if (!(flags & FLAGS_SHARED)) { @@ -1744,8 +1738,6 @@ retry_private: goto retry_private; } - put_futex_key(&key2); - put_futex_key(&key1); cond_resched(); goto retry; } @@ -1781,10 +1773,6 @@ retry_private: out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); -out_put_keys: - put_futex_key(&key2); -out_put_key1: - put_futex_key(&key1); out: return ret; } @@ -1996,7 +1984,7 @@ retry: ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) - goto out_put_key1; + goto out; /* * The check above which compares uaddrs is not sufficient for @@ -2004,7 +1992,7 @@ retry: */ if (requeue_pi && match_futex(&key1, &key2)) { ret = -EINVAL; - goto out_put_keys; + goto out; } hb1 = hash_futex(&key1); @@ -2025,13 +2013,11 @@ retry_private: ret = get_user(curval, uaddr1); if (ret) - goto out_put_keys; + goto out; if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&key2); - put_futex_key(&key1); goto retry; } if (curval != *cmpval) { @@ -2090,8 +2076,6 @@ retry_private: case -EFAULT: double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; @@ -2106,8 +2090,6 @@ retry_private: */ double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); - put_futex_key(&key2); - put_futex_key(&key1); /* * Handle the case where the owner is in the middle of * exiting. Wait for the exit to complete otherwise @@ -2217,10 +2199,6 @@ out_unlock: wake_up_q(&wake_q); hb_waiters_dec(hb2); -out_put_keys: - put_futex_key(&key2); -out_put_key1: - put_futex_key(&key1); out: return ret ? ret : task_count; } @@ -2697,7 +2675,6 @@ retry_private: if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&q->key); goto retry; } @@ -2707,8 +2684,6 @@ retry_private: } out: - if (ret) - put_futex_key(&q->key); return ret; } @@ -2853,7 +2828,6 @@ retry_private: * - EAGAIN: The user space value changed. */ queue_unlock(hb); - put_futex_key(&q.key); /* * Handle the case where the owner is in the middle of * exiting. Wait for the exit to complete otherwise @@ -2961,13 +2935,11 @@ no_block: put_pi_state(pi_state); } - goto out_put_key; + goto out; out_unlock_put_key: queue_unlock(hb); -out_put_key: - put_futex_key(&q.key); out: if (to) { hrtimer_cancel(&to->timer); @@ -2980,12 +2952,11 @@ uaddr_faulted: ret = fault_in_user_writeable(uaddr); if (ret) - goto out_put_key; + goto out; if (!(flags & FLAGS_SHARED)) goto retry_private; - put_futex_key(&q.key); goto retry; } @@ -3114,16 +3085,13 @@ retry: out_unlock: spin_unlock(&hb->lock); out_putkey: - put_futex_key(&key); return ret; pi_retry: - put_futex_key(&key); cond_resched(); goto retry; pi_faulted: - put_futex_key(&key); ret = fault_in_user_writeable(uaddr); if (!ret) @@ -3265,7 +3233,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out_key2; + goto out; /* * The check above which compares uaddrs is not sufficient for @@ -3274,7 +3242,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (match_futex(&q.key, &key2)) { queue_unlock(hb); ret = -EINVAL; - goto out_put_keys; + goto out; } /* Queue the futex_q, drop the hb lock, wait for wakeup. */ @@ -3284,7 +3252,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); spin_unlock(&hb->lock); if (ret) - goto out_put_keys; + goto out; /* * In order for us to be here, we know our q.key == key2, and since @@ -3374,11 +3342,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ret = -EWOULDBLOCK; } -out_put_keys: - put_futex_key(&q.key); -out_key2: - put_futex_key(&key2); - out: if (to) { hrtimer_cancel(&to->timer); -- cgit v1.2.3 From d7c5ed73b19c4640426d9c106f70ec2cb532034d Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 2 Jul 2020 17:28:41 -0300 Subject: futex: Remove needless goto's MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As stated in the coding style documentation, "if there is no cleanup needed then just return directly", instead of jumping to a label and then returning. Remove such goto's and replace with a return statement. When there's a ternary operator on the return value, replace it with the result of the operation when it is logically possible to determine it by the control flow. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200702202843.520764-3-andrealmeid@collabora.com --- kernel/futex.c | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index bd9adfca5d51..362fbca6d614 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1607,13 +1607,13 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; hb = hash_futex(&key); /* Make sure we really have tasks to wakeup */ if (!hb_waiters_pending(hb)) - goto out; + return ret; spin_lock(&hb->lock); @@ -1636,7 +1636,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) spin_unlock(&hb->lock); wake_up_q(&wake_q); -out: return ret; } @@ -1703,10 +1702,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, retry: ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) - goto out; + return ret; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1724,13 +1723,13 @@ retry_private: * an MMU, but we might get them from range checking */ ret = op_ret; - goto out; + return ret; } if (op_ret == -EFAULT) { ret = fault_in_user_writeable(uaddr2); if (ret) - goto out; + return ret; } if (!(flags & FLAGS_SHARED)) { @@ -1773,7 +1772,6 @@ retry_private: out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); -out: return ret; } @@ -1980,20 +1978,18 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, retry: ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) - goto out; + return ret; /* * The check above which compares uaddrs is not sufficient for * shared futexes. We need to compare the keys: */ - if (requeue_pi && match_futex(&key1, &key2)) { - ret = -EINVAL; - goto out; - } + if (requeue_pi && match_futex(&key1, &key2)) + return -EINVAL; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -2013,7 +2009,7 @@ retry_private: ret = get_user(curval, uaddr1); if (ret) - goto out; + return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; @@ -2079,7 +2075,7 @@ retry_private: ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; - goto out; + return ret; case -EBUSY: case -EAGAIN: /* @@ -2198,8 +2194,6 @@ out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); hb_waiters_dec(hb2); - -out: return ret ? ret : task_count; } @@ -2545,7 +2539,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) */ if (q->pi_state->owner != current) ret = fixup_pi_state_owner(uaddr, q, current); - goto out; + return ret ? ret : locked; } /* @@ -2558,7 +2552,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) */ if (q->pi_state->owner == current) { ret = fixup_pi_state_owner(uaddr, q, NULL); - goto out; + return ret; } /* @@ -2572,8 +2566,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) q->pi_state->owner); } -out: - return ret ? ret : locked; + return ret; } /** @@ -2670,7 +2663,7 @@ retry_private: ret = get_user(uval, uaddr); if (ret) - goto out; + return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; @@ -2683,7 +2676,6 @@ retry_private: ret = -EWOULDBLOCK; } -out: return ret; } -- cgit v1.2.3 From 9261308598ad28b9a8a2237d881833e9f217244e Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 2 Jul 2020 17:28:43 -0300 Subject: futex: Consistently use fshared as boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since fshared is only conveying true/false values, declare it as bool. In get_futex_key() the usage of fshared can be restricted to the first part of the function. If fshared is false the function is terminated early and the subsequent code can use a constant 'true' instead of the variable. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200702202843.520764-5-andrealmeid@collabora.com --- kernel/futex.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 362fbca6d614..cda91755b77d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -476,7 +476,7 @@ static u64 get_inode_sequence_number(struct inode *inode) /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex - * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED + * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED * @key: address where result is stored. * @rw: mapping needs to be read/write (values: FUTEX_READ, * FUTEX_WRITE) @@ -500,8 +500,8 @@ static u64 get_inode_sequence_number(struct inode *inode) * * lock_page() might sleep, the caller should not hold a spinlock. */ -static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw) +static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, + enum futex_access rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; @@ -538,7 +538,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a again: /* Ignore any VERIFY_READ mapping (futex common case) */ - if (unlikely(should_fail_futex(fshared))) + if (unlikely(should_fail_futex(true))) return -EFAULT; err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); @@ -626,7 +626,7 @@ again: * A RO anonymous page will never change and thus doesn't make * sense for futex operations. */ - if (unlikely(should_fail_futex(fshared)) || ro) { + if (unlikely(should_fail_futex(true)) || ro) { err = -EFAULT; goto out; } -- cgit v1.2.3 From 9a71df495c3d29dab596bb590e73fd8b20106e2d Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 2 Jul 2020 17:28:42 -0300 Subject: futex: Remove unused or redundant includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 82af7aca ("Removal of FUTEX_FD"), some includes related to file operations aren't needed anymore. More investigation around the includes showed that a lot of includes aren't required for compilation, possible due to redundant includes. Simplify the code by removing unused includes. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200702202843.520764-4-andrealmeid@collabora.com --- kernel/futex.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index cda91755b77d..4616d4ad609d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -32,30 +32,13 @@ * "But they come in a choice of three flavours!" */ #include -#include -#include -#include -#include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include -#include #include -- cgit v1.2.3 From ed00495333ccc80fc8fb86fb43773c3c2a499466 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Mon, 27 Jul 2020 14:48:52 +0200 Subject: locking/lockdep: Fix TRACE_IRQFLAGS vs. NMIs Prior to commit: 859d069ee1dd ("lockdep: Prepare for NMI IRQ state tracking") IRQ state tracking was disabled in NMIs due to nmi_enter() doing lockdep_off() -- with the obvious requirement that NMI entry call nmi_enter() before trace_hardirqs_off(). [ AFAICT, PowerPC and SH violate this order on their NMI entry ] However, that commit explicitly changed lockdep_hardirqs_*() to ignore lockdep_off() and breaks every architecture that has irq-tracing in it's NMI entry that hasn't been fixed up (x86 being the only fixed one at this point). The reason for this change is that by ignoring lockdep_off() we can: - get rid of 'current->lockdep_recursion' in lockdep_assert_irqs*() which was going to to give header-recursion issues with the seqlock rework. - allow these lockdep_assert_*() macros to function in NMI context. Restore the previous state of things and allow an architecture to opt-in to the NMI IRQ tracking support, however instead of relying on lockdep_off(), rely on in_nmi(), both are part of nmi_enter() and so over-all entry ordering doesn't need to change. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200727124852.GK119549@hirez.programming.kicks-ass.net --- arch/x86/Kconfig.debug | 3 +++ kernel/locking/lockdep.c | 8 +++++++- lib/Kconfig.debug | 6 ++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 0dd319e6e5b4..ee1d3c5834c6 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -3,6 +3,9 @@ config TRACE_IRQFLAGS_SUPPORT def_bool y +config TRACE_IRQFLAGS_NMI_SUPPORT + def_bool y + config EARLY_PRINTK_USB bool diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d595623c4b34..8b0b28b4546b 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3712,6 +3712,9 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) * and not rely on hardware state like normal interrupts. */ if (unlikely(in_nmi())) { + if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI)) + return; + /* * Skip: * - recursion check, because NMI can hit lockdep; @@ -3773,7 +3776,10 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) * they will restore the software state. This ensures the software * state is consistent inside NMIs as well. */ - if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK))) + if (in_nmi()) { + if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI)) + return; + } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK) return; /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ad9210d70a1..fa964b51f066 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1325,11 +1325,17 @@ config WW_MUTEX_SELFTEST endmenu # lock debugging config TRACE_IRQFLAGS + depends on TRACE_IRQFLAGS_SUPPORT bool help Enables hooks to interrupt enabling and disabling for either tracing or lock debugging. +config TRACE_IRQFLAGS_NMI + def_bool y + depends on TRACE_IRQFLAGS + depends on TRACE_IRQFLAGS_NMI_SUPPORT + config STACKTRACE bool "Stack backtrace support" depends on STACKTRACE_SUPPORT -- cgit v1.2.3 From 0584df9c12f449124d0bfef9899e5365604ee7a9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 29 Jul 2020 13:09:15 +0200 Subject: lockdep: Refactor IRQ trace events fields into struct Refactor the IRQ trace events fields, used for printing information about the IRQ trace events, into a separate struct 'irqtrace_events'. This improves readability by separating the information only used in reporting, as well as enables (simplified) storing/restoring of irqtrace_events snapshots. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200729110916.3920464-1-elver@google.com Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 13 +++++++++++ include/linux/sched.h | 11 ++------- kernel/fork.c | 16 +++++-------- kernel/locking/lockdep.c | 58 +++++++++++++++++++++++++----------------------- 4 files changed, 50 insertions(+), 48 deletions(-) (limited to 'kernel') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5811ee8a5cd8..bd5c55755447 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -33,6 +33,19 @@ #ifdef CONFIG_TRACE_IRQFLAGS +/* Per-task IRQ trace events information. */ +struct irqtrace_events { + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; +}; + DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d1de021b315..52e0fdd6a555 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -980,17 +981,9 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; + struct irqtrace_events irqtrace; unsigned int hardirq_threaded; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; u64 hardirq_chain_key; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; int irq_config; diff --git a/kernel/fork.c b/kernel/fork.c index 70d9d0a4de2a..56a640799680 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2035,17 +2035,11 @@ static __latent_entropy struct task_struct *copy_process( seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS - p->irq_events = 0; - p->hardirq_enable_ip = 0; - p->hardirq_enable_event = 0; - p->hardirq_disable_ip = _THIS_IP_; - p->hardirq_disable_event = 0; - p->softirqs_enabled = 1; - p->softirq_enable_ip = _THIS_IP_; - p->softirq_enable_event = 0; - p->softirq_disable_ip = 0; - p->softirq_disable_event = 0; - p->softirq_context = 0; + memset(&p->irqtrace, 0, sizeof(p->irqtrace)); + p->irqtrace.hardirq_disable_ip = _THIS_IP_; + p->irqtrace.softirq_enable_ip = _THIS_IP_; + p->softirqs_enabled = 1; + p->softirq_context = 0; #endif p->pagefault_disabled = 0; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c9ea05edce25..7b5800374c40 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3484,19 +3484,21 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, void print_irqtrace_events(struct task_struct *curr) { - printk("irq event stamp: %u\n", curr->irq_events); + const struct irqtrace_events *trace = &curr->irqtrace; + + printk("irq event stamp: %u\n", trace->irq_events); printk("hardirqs last enabled at (%u): [<%px>] %pS\n", - curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip, - (void *)curr->hardirq_enable_ip); + trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip, + (void *)trace->hardirq_enable_ip); printk("hardirqs last disabled at (%u): [<%px>] %pS\n", - curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip, - (void *)curr->hardirq_disable_ip); + trace->hardirq_disable_event, (void *)trace->hardirq_disable_ip, + (void *)trace->hardirq_disable_ip); printk("softirqs last enabled at (%u): [<%px>] %pS\n", - curr->softirq_enable_event, (void *)curr->softirq_enable_ip, - (void *)curr->softirq_enable_ip); + trace->softirq_enable_event, (void *)trace->softirq_enable_ip, + (void *)trace->softirq_enable_ip); printk("softirqs last disabled at (%u): [<%px>] %pS\n", - curr->softirq_disable_event, (void *)curr->softirq_disable_ip, - (void *)curr->softirq_disable_ip); + trace->softirq_disable_event, (void *)trace->softirq_disable_ip, + (void *)trace->softirq_disable_ip); } static int HARDIRQ_verbose(struct lock_class *class) @@ -3699,7 +3701,7 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare); void noinstr lockdep_hardirqs_on(unsigned long ip) { - struct task_struct *curr = current; + struct irqtrace_events *trace = ¤t->irqtrace; if (unlikely(!debug_locks)) return; @@ -3752,8 +3754,8 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) skip_checks: /* we'll do an OFF -> ON transition: */ this_cpu_write(hardirqs_enabled, 1); - curr->hardirq_enable_ip = ip; - curr->hardirq_enable_event = ++curr->irq_events; + trace->hardirq_enable_ip = ip; + trace->hardirq_enable_event = ++trace->irq_events; debug_atomic_inc(hardirqs_on_events); } EXPORT_SYMBOL_GPL(lockdep_hardirqs_on); @@ -3763,8 +3765,6 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on); */ void noinstr lockdep_hardirqs_off(unsigned long ip) { - struct task_struct *curr = current; - if (unlikely(!debug_locks)) return; @@ -3784,12 +3784,14 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) return; if (lockdep_hardirqs_enabled()) { + struct irqtrace_events *trace = ¤t->irqtrace; + /* * We have done an ON -> OFF transition: */ this_cpu_write(hardirqs_enabled, 0); - curr->hardirq_disable_ip = ip; - curr->hardirq_disable_event = ++curr->irq_events; + trace->hardirq_disable_ip = ip; + trace->hardirq_disable_event = ++trace->irq_events; debug_atomic_inc(hardirqs_off_events); } else { debug_atomic_inc(redundant_hardirqs_off); @@ -3802,7 +3804,7 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_off); */ void lockdep_softirqs_on(unsigned long ip) { - struct task_struct *curr = current; + struct irqtrace_events *trace = ¤t->irqtrace; if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -3814,7 +3816,7 @@ void lockdep_softirqs_on(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (curr->softirqs_enabled) { + if (current->softirqs_enabled) { debug_atomic_inc(redundant_softirqs_on); return; } @@ -3823,9 +3825,9 @@ void lockdep_softirqs_on(unsigned long ip) /* * We'll do an OFF -> ON transition: */ - curr->softirqs_enabled = 1; - curr->softirq_enable_ip = ip; - curr->softirq_enable_event = ++curr->irq_events; + current->softirqs_enabled = 1; + trace->softirq_enable_ip = ip; + trace->softirq_enable_event = ++trace->irq_events; debug_atomic_inc(softirqs_on_events); /* * We are going to turn softirqs on, so set the @@ -3833,7 +3835,7 @@ void lockdep_softirqs_on(unsigned long ip) * enabled too: */ if (lockdep_hardirqs_enabled()) - mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); + mark_held_locks(current, LOCK_ENABLED_SOFTIRQ); lockdep_recursion_finish(); } @@ -3842,8 +3844,6 @@ void lockdep_softirqs_on(unsigned long ip) */ void lockdep_softirqs_off(unsigned long ip) { - struct task_struct *curr = current; - if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -3853,13 +3853,15 @@ void lockdep_softirqs_off(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (curr->softirqs_enabled) { + if (current->softirqs_enabled) { + struct irqtrace_events *trace = ¤t->irqtrace; + /* * We have done an ON -> OFF transition: */ - curr->softirqs_enabled = 0; - curr->softirq_disable_ip = ip; - curr->softirq_disable_event = ++curr->irq_events; + current->softirqs_enabled = 0; + trace->softirq_disable_ip = ip; + trace->softirq_disable_event = ++trace->irq_events; debug_atomic_inc(softirqs_off_events); /* * Whoops, we wanted softirqs off, so why aren't they? -- cgit v1.2.3 From 92c209ac6d3d35783c16c8a717547183e6e11162 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 29 Jul 2020 13:09:16 +0200 Subject: kcsan: Improve IRQ state trace reporting To improve the general usefulness of the IRQ state trace events with KCSAN enabled, save and restore the trace information when entering and exiting the KCSAN runtime as well as when generating a KCSAN report. Without this, reporting the IRQ trace events (whether via a KCSAN report or outside of KCSAN via a lockdep report) is rather useless due to continuously being touched by KCSAN. This is because if KCSAN is enabled, every instrumented memory access causes changes to IRQ trace events (either by KCSAN disabling/enabling interrupts or taking report_lock when generating a report). Before "lockdep: Prepare for NMI IRQ state tracking", KCSAN avoided touching the IRQ trace events via raw_local_irq_save/restore() and lockdep_off/on(). Fixes: 248591f5d257 ("kcsan: Make KCSAN compatible with new IRQ state tracking") Signed-off-by: Marco Elver Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200729110916.3920464-2-elver@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/kcsan/core.c | 23 +++++++++++++++++++++++ kernel/kcsan/kcsan.h | 7 +++++++ kernel/kcsan/report.c | 3 +++ 4 files changed, 37 insertions(+) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 52e0fdd6a555..060e9214c8b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1184,8 +1184,12 @@ struct task_struct { #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif + #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; +#ifdef CONFIG_TRACE_IRQFLAGS + struct irqtrace_events kcsan_save_irqtrace; +#endif #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 732623c30359..0fe068192781 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -291,6 +291,20 @@ static inline unsigned int get_delay(void) 0); } +void kcsan_save_irqtrace(struct task_struct *task) +{ +#ifdef CONFIG_TRACE_IRQFLAGS + task->kcsan_save_irqtrace = task->irqtrace; +#endif +} + +void kcsan_restore_irqtrace(struct task_struct *task) +{ +#ifdef CONFIG_TRACE_IRQFLAGS + task->irqtrace = task->kcsan_save_irqtrace; +#endif +} + /* * Pull everything together: check_access() below contains the performance * critical operations; the fast-path (including check_access) functions should @@ -336,9 +350,11 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, flags = user_access_save(); if (consumed) { + kcsan_save_irqtrace(current); kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, KCSAN_REPORT_CONSUMED_WATCHPOINT, watchpoint - watchpoints); + kcsan_restore_irqtrace(current); } else { /* * The other thread may not print any diagnostics, as it has @@ -396,6 +412,12 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) goto out; } + /* + * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's + * runtime is entered for every memory access, and potentially useful + * information is lost if dirtied by KCSAN. + */ + kcsan_save_irqtrace(current); if (!kcsan_interrupt_watcher) local_irq_save(irq_flags); @@ -539,6 +561,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) out_unlock: if (!kcsan_interrupt_watcher) local_irq_restore(irq_flags); + kcsan_restore_irqtrace(current); out: user_access_restore(ua_flags); } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 763d6d08d94b..29480010dc30 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -9,6 +9,7 @@ #define _KERNEL_KCSAN_KCSAN_H #include +#include /* The number of adjacent watchpoints to check. */ #define KCSAN_CHECK_ADJACENT 1 @@ -22,6 +23,12 @@ extern unsigned int kcsan_udelay_interrupt; */ extern bool kcsan_enabled; +/* + * Save/restore IRQ flags state trace dirtied by KCSAN. + */ +void kcsan_save_irqtrace(struct task_struct *task); +void kcsan_restore_irqtrace(struct task_struct *task); + /* * Initialize debugfs file. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 6b2fb1a6d8cd..9d07e175de0f 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -308,6 +308,9 @@ static void print_verbose_info(struct task_struct *task) if (!task) return; + /* Restore IRQ state trace for printing. */ + kcsan_restore_irqtrace(task); + pr_err("\n"); debug_show_held_locks(task); print_irqtrace_events(task); -- cgit v1.2.3