From 2bc08847f5f95dbd10359c01e51d122e2e5488b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Apr 2019 10:19:52 +0100 Subject: lockdep: Swap storage for pin_count and references As a lockmap takes a reference for every ww_mutex used together, this can be an arbitrarily large number and under control of userspace -- easily overflowing the arbitrary limit of 4096. However, the pin_count (used for detecting unexpected lock dropping) is a full 32b despite nesting being extremely rare (see lockdep_pin_lock). Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190425092004.9995-33-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- include/linux/lockdep.h | 4 ++-- kernel/locking/lockdep.c | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..9023ee07bf6a 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -138,8 +138,8 @@ struct held_lock { unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:12; /* 32 bits */ - unsigned int pin_count; + unsigned int pin_count:12; /* 32 bits */ + unsigned int references; }; /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2270ec68f10a..dc03fe856729 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5402,11 +5402,14 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock) if (match_held_lock(hlock, lock)) { /* - * Grab 16bits of randomness; this is sufficient to not - * be guessable and still allows some pin nesting in - * our u32 pin_count. + * Grab 6bits of randomness; this is barely sufficient + * to not be guessable and still allows some 32 levels + * of pin nesting in our u12 pin_count. */ - cookie.val = 1 + (prandom_u32() >> 16); + cookie.val = 1 + (prandom_u32() >> 26); + if (DEBUG_LOCKS_WARN_ON(hlock->pin_count + cookie.val >= 1 << 12)) + return NIL_COOKIE; + hlock->pin_count += cookie.val; return cookie; } -- cgit v1.2.3 From 755132dd9c53de28673a463afd01a542712e663f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Nov 2017 12:57:06 +0000 Subject: ftrace: Allow configuring global trace buffer size (for dump-on-oops) We have recently turned on ftrace-dump-on-oops for i915's CI and an issue we have encountered is that the trace buffer size greatly exceeds the pstore capabilities; we get the tail of the oops but not the introduction. Currently the global buffer size is controllable on the cmdline, but at the request of our CI sysadmin, we would like to add a control to the Kconfig as well. The rationale being the cmdline carries the temporary hacks that we want to eradicate, and we want to track the permanent configuration in .config. I have kept the Kconfig option hidden from the user as the default should suffice for the majority of users; reserving the configuration for those that eschew the cmdline option. v2: Add an expert prompt to stop the default value overriding .config changes. Signed-off-by: Chris Wilson Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Tomi Sarvela Cc: Joonas Lahtinen Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi --- kernel/trace/Kconfig | 7 +++++++ kernel/trace/trace.c | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 420ff4bc67fd..a2e963985a29 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -121,6 +121,13 @@ config TRACING select EVENT_TRACING select TRACE_CLOCK +config GLOBAL_TRACE_BUF_SIZE + int + prompt "Global ftrace buffer size (for trace_printk)" if EXPERT + range 0 4194034 + default 1441792 # 16384 * 88 (sizeof(struct print_entry)) + depends on TRACING + config GENERIC_TRACER bool select TRACING diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f9139dc1262c..284ccc24416f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -797,9 +797,7 @@ int tracing_is_enabled(void) * to not have to wait for all that output. Anyway this can be * boot time and run time configurable. */ -#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ - -static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; +static unsigned long trace_buf_size = CONFIG_GLOBAL_TRACE_BUF_SIZE; /* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; -- cgit v1.2.3 From 1addcbbd225f3c7408850479f47a49bc96de275b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Sep 2018 14:17:45 +0100 Subject: kernel/panic: Show the stacktrace after additional notifier messages Most systems keep the last messages from the panic, and we value the stacktrace most, so dump it last in order to preserve it for post-mortems. Signed-off-by: Chris Wilson Acked-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20180903131745.30593-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- kernel/panic.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index cefd7d82366f..494e8e3dfae0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -224,13 +224,6 @@ void panic(const char *fmt, ...) buf[len - 1] = '\0'; pr_emerg("Kernel panic - not syncing: %s\n", buf); -#ifdef CONFIG_DEBUG_BUGVERBOSE - /* - * Avoid nested stack-dumping if a panic occurs during oops processing - */ - if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) - dump_stack(); -#endif /* * If kgdb is enabled, give it a chance to run before we stop all @@ -271,6 +264,14 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); +#ifdef CONFIG_DEBUG_BUGVERBOSE + /* + * Avoid nested stack-dumping if a panic occurs during oops processing + */ + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) + dump_stack(); +#endif + kmsg_dump(KMSG_DUMP_PANIC); /* -- cgit v1.2.3 From ea71c552d6915f498cd1dbec2d39f5a8e7ca7c3e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Oct 2018 12:35:21 +0100 Subject: x86: Downgrade clock throttling thermal event critical error Under CI testing, it is common for the cpus to overheat with the continuous workloads and end up being throttled. As the cpus still function, it is less of a critical error meriting urgent action, but an expected yet significant condition (pr_note). Signed-off-by: Chris Wilson Cc: Petri Latvala Signed-off-by: Rodrigo Vivi --- drivers/thermal/intel/therm_throt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index dab7e8fb1059..11b06112b745 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -284,10 +284,10 @@ static void __maybe_unused throttle_active_work(struct work_struct *work) avg /= ARRAY_SIZE(state->temp_samples); if (state->average > avg) { - pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", - this_cpu, - state->level == CORE_LEVEL ? "Core" : "Package", - state->count); + pr_notice("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", + this_cpu, + state->level == CORE_LEVEL ? "Core" : "Package", + state->count); state->rate_control_active = true; } -- cgit v1.2.3 From 2c2dfe8da9505a178fd1839d4f7dd749de626539 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 16 Nov 2021 09:22:48 +0100 Subject: libata: Downgrade unsupported feature warnings to notifications Signed-off-by: Chris Wilson Cc: Petri Latvala [danvet: Rebase] Signed-off-by: Rodrigo Vivi --- drivers/ata/libata-core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8a0ccb190d76..ebda41c7925f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2062,7 +2062,7 @@ static bool ata_identify_page_supported(struct ata_device *dev, u8 page) * for drives which implement this ATA level or above. */ if (ata_id_major_version(dev->id) >= 10) - ata_dev_warn(dev, + ata_dev_notice(dev, "ATA Identify Device Log not supported\n"); dev->horkage |= ATA_HORKAGE_NO_ID_DEV_LOG; return false; @@ -2134,7 +2134,7 @@ static void ata_dev_config_ncq_send_recv(struct ata_device *dev) unsigned int err_mask; if (!ata_log_supported(dev, ATA_LOG_NCQ_SEND_RECV)) { - ata_dev_warn(dev, "NCQ Send/Recv Log not supported\n"); + ata_dev_notice(dev, "NCQ Send/Recv Log not supported\n"); return; } err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_SEND_RECV, @@ -2159,8 +2159,8 @@ static void ata_dev_config_ncq_non_data(struct ata_device *dev) unsigned int err_mask; if (!ata_log_supported(dev, ATA_LOG_NCQ_NON_DATA)) { - ata_dev_warn(dev, - "NCQ Send/Recv Log not supported\n"); + ata_dev_notice(dev, + "NCQ Send/Recv Log not supported\n"); return; } err_mask = ata_read_log_page(dev, ATA_LOG_NCQ_NON_DATA, @@ -2656,14 +2656,14 @@ int ata_dev_configure(struct ata_device *dev) if (ata_id_is_cfa(id)) { /* CPRM may make this media unusable */ if (id[ATA_ID_CFA_KEY_MGMT] & 1) - ata_dev_warn(dev, + ata_dev_notice(dev, "supports DRM functions and may not be fully accessible\n"); snprintf(revbuf, 7, "CFA"); } else { snprintf(revbuf, 7, "ATA-%d", ata_id_major_version(id)); /* Warn the user if the device has TPM extensions */ if (ata_id_has_tpm(id)) - ata_dev_warn(dev, + ata_dev_notice(dev, "supports DRM functions and may not be fully accessible\n"); } @@ -2817,8 +2817,8 @@ int ata_dev_configure(struct ata_device *dev) } if ((dev->horkage & ATA_HORKAGE_FIRMWARE_WARN) && print_info) { - ata_dev_warn(dev, "WARNING: device requires firmware update to be fully functional\n"); - ata_dev_warn(dev, " contact the vendor or visit http://ata.wiki.kernel.org\n"); + ata_dev_notice(dev, "WARNING: device requires firmware update to be fully functional\n"); + ata_dev_notice(dev, " contact the vendor or visit http://ata.wiki.kernel.org\n"); } return 0; -- cgit v1.2.3 From 5179ec408bcdb7d305f4f7665a95bf108141eeab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 May 2017 12:45:25 +0100 Subject: perf/core: Avoid removing shared pmu_context on unregister In commit 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu"), the search for another user of the pmu_cpu_context was removed, and so we unconditionally free it during perf_pmu_unregister. This leads to random corruption later and a BUG at mm/percpu.c:689. v2: Check for shared pmu_contexts under the mutex. Fixes: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") Signed-off-by: Chris Wilson Cc: David Carrillo-Cisneros Cc: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: # v4.11+ Link: http://patchwork.freedesktop.org/patch/msgid/20170512114525.17575-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 523106a506ee..4829e9b397d5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11210,7 +11210,8 @@ void perf_pmu_unregister(struct pmu *pmu) device_del(pmu->dev); put_device(pmu->dev); } - free_pmu_context(pmu); + if (!find_pmu_context(pmu->task_ctx_nr)) + free_pmu_context(pmu); mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); -- cgit v1.2.3 From 04e0abc359fa26e35845b64dea84613d3c378e23 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 2 Jul 2018 16:57:56 +0300 Subject: ICL HACK: usb/icl: Work around ACPI boottime crash Work around the following boot time crash: [ 10.456056] CPU: 1 PID: 220 Comm: systemd-udevd Tainted: G W 4.17.0-rc7-CI-CI_DRM_4040+ #182 [ 10.465828] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP, BIOS +ICLSFWR1.R00.2204.A00.1805172221 05/17/2018 [ 10.479168] RIP: 0010:acpi_ps_complete_this_op+0xa7/0x22a [ 10.484627] RSP: 0018:ffffc900003a7578 EFLAGS: 00010202 [ 10.489881] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8804abeda9c8 RCX: 0000000000000020 [ 10.497045] RDX: 0000000000000000 RSI: ffff88049e604a68 RDI: 0000000000000000 [ 10.504213] RBP: 0000000000000000 R08: ffff8804abeda9c8 R09: 0000000000000000 [ 10.511376] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000e [ 10.518542] R13: ffff88049e604a68 R14: ffff88049e604a68 R15: ffffffffa00263c2 [ 10.525713] FS: 00007ff6d85f18c0(0000) GS:ffff8804be880000(0000) knlGS:0000000000000000 [ 10.533839] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 10.539616] CR2: 00007ff6d73cff40 CR3: 000000049f794001 CR4: 0000000000760ee0 [ 10.546783] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 10.553949] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 10.561112] PKRU: 55555554 [ 10.563849] Call Trace: [ 10.566323] acpi_ps_complete_op+0x49/0x3f1 [ 10.570537] acpi_ps_parse_loop+0x94c/0x9bb [ 10.574754] ? acpi_ds_delete_walk_state+0x113/0x131 [ 10.579750] acpi_ps_parse_aml+0x1a2/0x4af [ 10.583875] acpi_ps_execute_method+0x1e9/0x2a5 [ 10.588435] acpi_ns_evaluate+0x2e4/0x42c [ 10.592473] acpi_evaluate_object+0x1fd/0x3a8 [ 10.596873] usb_acpi_find_companion+0xee/0x1f0 [usbcore] [ 10.602319] acpi_platform_notify+0x33/0xa0 [ 10.606532] device_add+0x197/0x600 [ 10.610048] ? __init_waitqueue_head+0x36/0x50 [ 10.614529] usb_hub_create_port_device+0x11d/0x340 [usbcore] [ 10.620314] hub_probe+0x9a5/0x1010 [usbcore] [ 10.624701] ? _raw_spin_unlock_irqrestore+0x51/0x60 [ 10.629730] usb_probe_interface+0x13f/0x300 [usbcore] [ 10.634900] driver_probe_device+0x302/0x470 [ 10.639198] ? __driver_attach+0xe0/0xe0 [ 10.643147] bus_for_each_drv+0x59/0x90 [ 10.647013] __device_attach+0xb7/0x130 [ 10.650878] bus_probe_device+0x9c/0xb0 [ 10.654745] device_add+0x3c5/0x600 [ 10.658270] usb_set_configuration+0x540/0x880 [usbcore] [ 10.663621] generic_probe+0x28/0x80 [usbcore] [ 10.668097] driver_probe_device+0x302/0x470 [ 10.672393] ? __driver_attach+0xe0/0xe0 [ 10.676346] bus_for_each_drv+0x59/0x90 [ 10.680211] __device_attach+0xb7/0x130 [ 10.684076] bus_probe_device+0x9c/0xb0 [ 10.687940] device_add+0x3c5/0x600 [ 10.691464] usb_new_device+0x269/0x490 [usbcore] [ 10.696206] usb_add_hcd+0x558/0x850 [usbcore] [ 10.700682] xhci_pci_probe+0x13d/0x240 [xhci_pci] [ 10.705534] pci_device_probe+0xa1/0x130 [ 10.709484] driver_probe_device+0x302/0x470 [ 10.713784] __driver_attach+0xb9/0xe0 [ 10.717562] ? driver_probe_device+0x470/0x470 [ 10.722033] ? driver_probe_device+0x470/0x470 [ 10.726505] bus_for_each_dev+0x64/0x90 [ 10.730370] ? preempt_count_sub+0x92/0xd0 [ 10.734495] bus_add_driver+0x164/0x260 [ 10.738362] ? 0xffffffffa004e000 [ 10.741704] driver_register+0x57/0xc0 [ 10.745482] ? 0xffffffffa004e000 [ 10.748824] do_one_initcall+0x4a/0x350 [ 10.752690] ? do_init_module+0x22/0x20a [ 10.756643] ? rcu_read_lock_sched_held+0x74/0x80 [ 10.761377] ? kmem_cache_alloc_trace+0x284/0x2e0 [ 10.766114] do_init_module+0x5b/0x20a [ 10.769895] load_module+0x250d/0x2b20 [ 10.773678] ? kernel_read+0x2c/0x40 [ 10.777285] ? __se_sys_finit_module+0xaa/0xc0 [ 10.781759] __se_sys_finit_module+0xaa/0xc0 [ 10.786061] do_syscall_64+0x54/0x190 [ 10.789752] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 10.794831] RIP: 0033:0x7ff6d74664d9 [ 10.798430] RSP: 002b:00007ffd91e7dd78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 10.806033] RAX: ffffffffffffffda RBX: 0000560519bfae20 RCX: 00007ff6d74664d9 [ 10.813195] RDX: 0000000000000000 RSI: 00007ff6d795ce23 RDI: 000000000000000e [ 10.820360] RBP: 00007ff6d795ce23 R08: 0000000000000000 R09: 0000000000000000 [ 10.827523] R10: 000000000000000e R11: 0000000000000246 R12: 0000000000000000 [ 10.834690] R13: 0000560519bf9a30 R14: 0000000000020000 R15: 000000000aba9500 [ 10.841862] Code: c2 10 5f ea 81 48 c7 c6 f0 5e ea 81 bf 7c 00 00 00 e8 0d 7c 00 00 31 ed e9 88 01 00 00 48 8b 03 31 ed 48 85 c0 +0f 84 e9 00 00 00 <4c> 8b 60 28 4d 85 e4 0f 84 dc 00 00 00 0f b7 78 0a e8 62 fe ff [ 10.860832] RIP: acpi_ps_complete_this_op+0xa7/0x22a RSP: ffffc900003a7578 [ 10.867907] ---[ end trace 3a0d2ee1129bc71e ]--- Cc: Chris Wilson Signed-off-by: Imre Deak Tested-by: Tomi Sarvela Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180702135756.12159-1-imre.deak@intel.com Signed-off-by: Rodrigo Vivi --- drivers/usb/core/usb-acpi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index 50b2fc7fcc0e..f511b8bf23ca 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "hub.h" @@ -81,6 +82,20 @@ int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable) } EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); +static const struct dmi_system_id intel_icl_broken_acpi[] = { + { + .ident = "ICL RVP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Ice Lake Client Platform"), + }, + }, + + { } +}; + +static bool acpi_connection_type_broken; + static enum usb_port_connect_type usb_acpi_get_connect_type(acpi_handle handle, struct acpi_pld_info *pld) { @@ -89,6 +104,10 @@ static enum usb_port_connect_type usb_acpi_get_connect_type(acpi_handle handle, union acpi_object *upc = NULL; acpi_status status; + /* Work around unknown ACPI instruction error on ICL RVP BIOSes. */ + if (acpi_connection_type_broken) + return USB_PORT_CONNECT_TYPE_UNKNOWN; + /* * According to 9.14 in ACPI Spec 6.2. _PLD indicates whether usb port * is user visible and _UPC indicates whether it is connectable. If @@ -273,6 +292,11 @@ static struct acpi_bus_type usb_acpi_bus = { int usb_acpi_register(void) { + if (dmi_check_system(intel_icl_broken_acpi)) { + pr_info("USB ACPI connection type broken.\n"); + acpi_connection_type_broken = true; + } + return register_acpi_bus_type(&usb_acpi_bus); } -- cgit v1.2.3 From d54085411bf1c8960fa88d511a2bbd3e462a2bd3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 May 2019 22:46:48 +0200 Subject: RFC: hung_task: taint kernel There's the hung_task_panic sysctl, but that's a bit an extreme measure. As a fallback taint at least the machine. Our CI uses this to decide when a reboot is necessary, plus to figure out whether the kernel is still happy. v2: Works much better when I put the else { add_taint() } at the right place. Signed-off-by: Daniel Vetter Cc: Andrew Morton Cc: Tetsuo Handa Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Valdis Kletnieks Cc: Daniel Vetter Cc: Vitaly Kuznetsov Cc: "Liu, Chuansheng" Acked-by: Chris Wilson (for core-for-CI) Link: https://patchwork.freedesktop.org/patch/msgid/20190502204648.5537-1-daniel.vetter@ffwll.ch Signed-off-by: Jani Nikula Signed-off-by: Rodrigo Vivi --- kernel/hung_task.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 9888e2bc8c76..df979c5043e0 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -118,6 +118,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) console_verbose(); hung_task_show_lock = true; hung_task_call_panic = true; + } else { + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } /* -- cgit v1.2.3 From f642a7785e81134e95b45b50781639f8eb09a453 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 May 2019 21:42:08 +0200 Subject: RFC: soft/hardlookup: taint kernel There's the soft/hardlookup_panic sysctls, but that's a bit an extreme measure. As a fallback taint at least the machine. Our CI uses this to decide when a reboot is necessary, plus to figure out whether the kernel is still happy. Signed-off-by: Daniel Vetter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Valdis Kletnieks Cc: Laurence Oberman Cc: Vincent Whitchurch Cc: Don Zickus Cc: Andrew Morton Cc: Sergey Senozhatsky Cc: Sinan Kaya Cc: Daniel Vetter Acked-by: Chris Wilson (for core-for-CI) Link: https://patchwork.freedesktop.org/patch/msgid/20190502194208.3535-2-daniel.vetter@ffwll.ch Signed-off-by: Jani Nikula Signed-off-by: Rodrigo Vivi --- kernel/watchdog.c | 2 ++ kernel/watchdog_hld.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ad912511a0c0..2e7a449ef9f7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -442,6 +442,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); + else + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582c..cce46cf75d76 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -154,6 +154,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + else + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); __this_cpu_write(hard_watchdog_warn, true); return; -- cgit v1.2.3 From e08d510c0fdaa2dfae6f4a43e7eb368959e20c27 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 16 Nov 2021 09:27:50 +0100 Subject: sched: Mark "RT throttling activated" as KERN_NOTICE Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2781 [danvet: Rebase] Signed-off-by: Rodrigo Vivi --- kernel/sched/deadline.c | 2 +- kernel/sched/rt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d2c072b0ef01..db884d20ac2b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -800,7 +800,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) * entity. */ if (dl_time_before(dl_se->deadline, rq_clock(rq))) { - printk_deferred_once("sched: DL replenish lagged too much\n"); + printk_deferred_once(KERN_NOTICE "sched: DL replenish lagged too much\n"); dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; dl_se->runtime = pi_of(dl_se)->dl_runtime; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b48baaba2fc2..b04c19579f82 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -977,7 +977,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) */ if (likely(rt_b->rt_runtime)) { rt_rq->rt_throttled = 1; - printk_deferred_once("sched: RT throttling activated\n"); + printk_deferred_once(KERN_NOTICE "sched: RT throttling activated\n"); } else { /* * In case we did anyway, make it go away, -- cgit v1.2.3 From 25d97979aa03d4540cf865dde41b5d93b64d3cf8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 18 Jul 2017 10:21:10 +0200 Subject: net/sch_generic: Shut up noise We can't allow spam in CI. Update 26th June 2018: This is still an issue: Update 23rd May 2019: You guessed it, still ocurring. [ 224.739686] ------------[ cut here ]------------ [ 224.739712] WARNING: CPU: 3 PID: 2982 at net/sched/sch_generic.c:461 dev_watchdog+0x1fd/0x210 [ 224.739714] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_pcm i915 asix usbnet mii mei_me mei prime_numbers i2c_hid pinctrl_sunrisepoint pinctrl_intel btusb btrtl btbcm btintel bluetooth ecdh_generic [ 224.739775] CPU: 3 PID: 2982 Comm: gem_exec_suspen Tainted: G U W 4.18.0-rc2-CI-Patchwork_9414+ #1 [ 224.739777] Hardware name: Dell Inc. XPS 13 9350/, BIOS 1.4.12 11/30/2016 [ 224.739780] RIP: 0010:dev_watchdog+0x1fd/0x210 [ 224.739781] Code: 49 63 4c 24 f0 eb 92 4c 89 ef c6 05 21 46 ad 00 01 e8 77 ee fc ff 89 d9 48 89 c2 4c 89 ee 48 c7 c7 88 4c 14 82 e8 a3 fe 84 ff <0f> 0b eb be 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 c7 47 [ 224.739866] RSP: 0018:ffff88027dd83e40 EFLAGS: 00010286 [ 224.739869] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000102 [ 224.739871] RDX: 0000000080000102 RSI: ffffffff820c8c6c RDI: 00000000ffffffff [ 224.739873] RBP: ffff8802644c1540 R08: 0000000071be9b33 R09: 0000000000000000 [ 224.739874] R10: ffff88027dd83dc0 R11: 0000000000000000 R12: ffff8802644c1588 [ 224.739876] R13: ffff8802644c1160 R14: 0000000000000001 R15: ffff88026a5dc728 [ 224.739878] FS: 00007f18f4887980(0000) GS:ffff88027dd80000(0000) knlGS:0000000000000000 [ 224.739880] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 224.739881] CR2: 00007f4c627ae548 CR3: 000000022ca1a002 CR4: 00000000003606e0 [ 224.739883] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 224.739885] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 224.739886] Call Trace: [ 224.739888] [ 224.739892] ? qdisc_reset+0xe0/0xe0 [ 224.739894] ? qdisc_reset+0xe0/0xe0 [ 224.739897] call_timer_fn+0x93/0x360 [ 224.739903] expire_timers+0xc1/0x1d0 [ 224.739908] run_timer_softirq+0xc7/0x170 [ 224.739916] __do_softirq+0xd9/0x505 [ 224.739923] irq_exit+0xa9/0xc0 [ 224.739926] smp_apic_timer_interrupt+0x9c/0x2d0 [ 224.739929] apic_timer_interrupt+0xf/0x20 [ 224.739931] [ 224.739934] RIP: 0010:delay_tsc+0x2e/0xb0 [ 224.739936] Code: 49 89 fc 55 53 bf 01 00 00 00 e8 6d 2c 78 ff e8 88 9d b6 ff 41 89 c5 0f ae e8 0f 31 48 c1 e2 20 48 09 c2 48 89 d5 eb 16 f3 90 01 00 00 00 e8 48 2c 78 ff e8 63 9d b6 ff 44 39 e8 75 36 0f ae [ 224.740021] RSP: 0018:ffffc900002f7d48 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 [ 224.740024] RAX: 0000000080000000 RBX: 0000000649565ca9 RCX: 0000000000000001 [ 224.740026] RDX: 0000000080000001 RSI: ffffffff820c8c6c RDI: 00000000ffffffff [ 224.740027] RBP: 00000006493ea9ce R08: 000000005e81e2ee R09: 0000000000000000 [ 224.740029] R10: 0000000000000120 R11: 0000000000000000 R12: 00000000002ad8d6 [ 224.740030] R13: 0000000000000003 R14: 0000000000000004 R15: ffff88025caf5408 [ 224.740040] ? delay_tsc+0x66/0xb0 [ 224.740045] hibernation_debug_sleep+0x1c/0x30 [ 224.740048] hibernation_snapshot+0x2c1/0x690 [ 224.740053] hibernate+0x142/0x2a4 [ 224.740057] state_store+0xd0/0xe0 [ 224.740063] kernfs_fop_write+0x104/0x190 [ 224.740068] __vfs_write+0x31/0x180 [ 224.740072] ? rcu_read_lock_sched_held+0x6f/0x80 [ 224.740075] ? rcu_sync_lockdep_assert+0x29/0x50 [ 224.740078] ? __sb_start_write+0x152/0x1f0 [ 224.740080] ? __sb_start_write+0x168/0x1f0 [ 224.740084] vfs_write+0xbd/0x1a0 [ 224.740088] ksys_write+0x50/0xc0 [ 224.740094] do_syscall_64+0x55/0x190 [ 224.740097] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 224.740099] RIP: 0033:0x7f18f400a281 [ 224.740100] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53 [ 224.740186] RSP: 002b:00007fffd1f4fec8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 224.740189] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f18f400a281 [ 224.740190] RDX: 0000000000000004 RSI: 00007f18f448069a RDI: 0000000000000006 [ 224.740192] RBP: 00007fffd1f4fef0 R08: 0000000000000000 R09: 0000000000000000 [ 224.740194] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e795d03400 [ 224.740195] R13: 00007fffd1f50500 R14: 0000000000000000 R15: 0000000000000000 [ 224.740205] irq event stamp: 1582591 [ 224.740207] hardirqs last enabled at (1582590): [] vprintk_emit+0x4bc/0x4d0 [ 224.740210] hardirqs last disabled at (1582591): [] error_entry+0x7c/0x100 [ 224.740212] softirqs last enabled at (1582568): [] __do_softirq+0x34f/0x505 [ 224.740215] softirqs last disabled at (1582571): [] irq_exit+0xa9/0xc0 [ 224.740218] WARNING: CPU: 3 PID: 2982 at net/sched/sch_generic.c:461 dev_watchdog+0x1fd/0x210 [ 224.740219] ---[ end trace 6e41d690e611c338 ]--- References: https://bugzilla.kernel.org/show_bug.cgi?id=196399 Acked-by: Martin Peres Cc: Martin Peres Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170718082110.12524-1-daniel.vetter@ffwll.ch Signed-off-by: Rodrigo Vivi --- net/sched/sch_generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3b0f62095803..e10273f68c6c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -472,7 +472,12 @@ static void dev_watchdog(struct timer_list *t) } } - if (some_queue_timedout) { + /* The noise is pissing off our CI and upstream doesn't + * move on the bug report: + * + * https://bugzilla.kernel.org/show_bug.cgi?id=196399 + */ + if (some_queue_timedout && 0) { trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); -- cgit v1.2.3 From f994e691de8a5ef0c42fc6065a69bee0e2acc21a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jul 2019 15:29:03 +0100 Subject: mm: Show slab debug as offsets from section base not hashed pointers Since the kernel now used hashed pointers for raw addresses, it is very hard to guage the relative placement within a section, and since the hash value will never match up with any contents, using it provides no information relevant for slab debugging. Show the relative offset into each section, so that some reference for the hexdump is provided. Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f7368bfffb7a..1cb98e8a7c29 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -685,7 +685,7 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, + print_hex_dump(level, text, DUMP_PREFIX_OFFSET, 16, 1, kasan_reset_tag((void *)addr), length, 1); metadata_access_disable(); } -- cgit v1.2.3 From a42131284b490f24b3c04f03d8272924d515cf71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 27 Jul 2019 13:17:50 +0100 Subject: uapi/perf: Squelch compiler warning Remove copious amounts of ./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0) as they are drowning out our warnings. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190727121750.20882-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- include/uapi/linux/perf_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bd8860eeb291..45f20a518db0 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -164,11 +164,12 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ - - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + +#define __PERF_SAMPLE_CALLCHAIN_EARLY (1ULL << 63) /* non-ABI; internal use */ + /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * -- cgit v1.2.3 From af336ab936e60be7c4dff4e7bf803108cb420f27 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 15:23:39 +0100 Subject: drm/i915: Force compilation with intel-iommu for CI validation Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910142339.17072-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 47e845353ffa..f27c0b5873f7 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -21,6 +21,14 @@ config DRM_I915_DEBUG depends on DRM_I915 depends on EXPERT # only for developers depends on !COMPILE_TEST # never built by robots + select PCI_MSI # ... for iommu enabled by default + select IOMMU_API + select IOMMU_IOVA + select IOMMU_SUPPORT + select NEED_DMA_MAP_STATE + select DMAR_TABLE + select INTEL_IOMMU + select INTEL_IOMMU_DEFAULT_ON select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV -- cgit v1.2.3 From c85f476858781050bb86a7bebe57d1bedac73f73 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 14:44:17 +0100 Subject: HAX iommu/intel: Ignore igfx_off CI currently applies intel_iommu=igfx_off on the commandline and we wish to ignore that until it is removed. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910134417.14085-3-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- drivers/iommu/intel/iommu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0bde0c8b4126..34e2fe0bfc8f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -424,8 +424,6 @@ static int __init intel_iommu_setup(char *str) no_platform_optin = 1; pr_info("IOMMU disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { - dmar_map_gfx = 0; - pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); iommu_dma_forcedac = true; -- cgit v1.2.3 From b3acf17eafd473e6e8f8f7c5ec7040efaaff22b8 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 6 Nov 2019 12:01:55 +0200 Subject: Revert "drm/i915: Don't select BROKEN" This reverts commit cea35f5ad5ffac06ea29e0d7a7f748683e1f1b7d. References: https://lists.freedesktop.org/archives/intel-gfx/2019-November/218878.html Suggested-by: Daniel Vetter Signed-off-by: Joonas Lahtinen Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index f27c0b5873f7..e7fd3e76f8a2 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -48,6 +48,7 @@ config DRM_I915_DEBUG select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST + select BROKEN # for prototype uAPI default n help Choose this option to turn on extra driver debugging that may affect -- cgit v1.2.3 From 9371e8d44608e1cec2096705445ed9699846a168 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Apr 2020 12:46:25 +0100 Subject: HAX timer: Describe the delayed_work for a freed timer Improve upon the <3> [310.437368] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x10 by describing what delayed_work was queued instead. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200409114625.12251-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- kernel/time/timer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e3d2c23c413d..4bec92f6c2ab 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -617,7 +617,14 @@ static const struct debug_obj_descr timer_debug_descr; static void *timer_debug_hint(void *addr) { - return ((struct timer_list *) addr)->function; + struct timer_list *timer = addr; + + if (timer->function == delayed_work_timer_fn) { + struct delayed_work *work = from_timer(work, timer, timer); + return work->work.func; + } + + return timer->function; } static bool timer_is_static_object(void *addr) -- cgit v1.2.3 From b1498ab9f95cd7e60e6a5f90be337d9bdcd352a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Apr 2020 09:27:53 +0100 Subject: pci/msi: Stop warning for MSI enabling failure If the MSI is already enabled, trying to enable it again results in an -EINVAL and on the first attempt a WARN. That WARN causes our CI to abort the run [on each first attempt to suspend]: <4> [463.142025] WARNING: CPU: 0 PID: 2225 at drivers/pci/msi.c:1074 __pci_enable_msi_range+0x3cb/0x420 <4> [463.142026] Modules linked in: snd_hda_intel i915 snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_intel_dspcfg ghash_clmulni_intel snd_hda_codec btusb btrtl btbcm btintel e1000e bluetooth snd_hwdep snd_hda_core ptp ecdh_generic snd_pcm ecc pps_core mei_me mei prime_numbers [last unloaded: i915] <4> [463.142045] CPU: 0 PID: 2225 Comm: kworker/u8:14 Tainted: G U 5.7.0-rc2-CI-CI_DRM_8350+ #1 <4> [463.142046] Hardware name: Intel Corporation NUC7i5BNH/NUC7i5BNB, BIOS BNKBL357.86A.0060.2017.1214.2013 12/14/2017 <4> [463.142049] Workqueue: events_unbound async_run_entry_fn <4> [463.142051] RIP: 0010:__pci_enable_msi_range+0x3cb/0x420 <4> [463.142053] Code: 76 58 49 8d 56 48 48 89 df e8 31 73 fd ff e9 20 fe ff ff 31 f6 48 89 df e8 c2 e9 fd ff e9 d6 fe ff ff 45 89 fc e9 1a ff ff ff <0f> 0b 41 bc ea ff ff ff e9 0d ff ff ff 41 bc ea ff ff ff e9 02 ff <4> [463.142054] RSP: 0018:ffffc90000593cd0 EFLAGS: 00010202 <4> [463.142056] RAX: 0000000000000010 RBX: ffff888274051000 RCX: 0000000000000000 <4> [463.142057] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff888274051000 <4> [463.142058] RBP: ffff888238aa1018 R08: 0000000000000001 R09: 0000000000000001 <4> [463.142060] R10: ffffc90000593d90 R11: 00000000c79cdfd5 R12: ffff8882740510b0 <4> [463.142061] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001 <4> [463.142062] FS: 0000000000000000(0000) GS:ffff888276c00000(0000) knlGS:0000000000000000 <4> [463.142064] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [463.142065] CR2: 000055706f347d80 CR3: 0000000005610003 CR4: 00000000003606f0 <4> [463.142066] Call Trace: <4> [463.142073] pci_enable_msi+0x11/0x20 <4> [463.142077] azx_resume+0x1ab/0x200 [snd_hda_intel] <4> [463.142080] ? pci_pm_thaw+0x80/0x80 <4> [463.142084] dpm_run_callback+0x64/0x280 <4> [463.142089] device_resume+0xd4/0x1c0 <4> [463.142093] ? dpm_watchdog_set+0x60/0 While this would appear to be a bug in snd-hda, it does appear inconsequential, at least for gfx-ci. Downgrade the warning to an info, like the other already-enabled error for MSI-X. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1687 Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200423082753.3899018-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- drivers/pci/msi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 48e3f4e47b29..94ce0266fe30 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1010,8 +1010,10 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; - if (WARN_ON_ONCE(dev->msi_enabled)) + if (dev->msi_enabled) { + pci_info(dev, "can't enable MSI, already enabled\n"); return -EINVAL; + } nvec = pci_msi_vec_count(dev); if (nvec < 0) -- cgit v1.2.3 From 63215adf1669be1c2b6bb92bd93fec61216b62fc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sat, 2 May 2020 18:14:13 +0100 Subject: perf/core: Only copy-to-user after completely unlocking all locks, v3. We inadvertently create a dependency on mmap_sem with a whole chain. This breaks any user who wants to take a lock and call rcu_barrier(), while also taking that lock inside mmap_sem: <4> [604.892532] ====================================================== <4> [604.892534] WARNING: possible circular locking dependency detected <4> [604.892536] 5.6.0-rc7-CI-Patchwork_17096+ #1 Tainted: G U <4> [604.892537] ------------------------------------------------------ <4> [604.892538] kms_frontbuffer/2595 is trying to acquire lock: <4> [604.892540] ffffffff8264a558 (rcu_state.barrier_mutex){+.+.}, at: rcu_barrier+0x23/0x190 <4> [604.892547] but task is already holding lock: <4> [604.892547] ffff888484716050 (reservation_ww_class_mutex){+.+.}, at: i915_gem_object_pin_to_display_plane+0x89/0x270 [i915] <4> [604.892592] which lock already depends on the new lock. <4> [604.892593] the existing dependency chain (in reverse order) is: <4> [604.892594] -> #6 (reservation_ww_class_mutex){+.+.}: <4> [604.892597] __ww_mutex_lock.constprop.15+0xc3/0x1090 <4> [604.892598] ww_mutex_lock+0x39/0x70 <4> [604.892600] dma_resv_lockdep+0x10e/0x1f5 <4> [604.892602] do_one_initcall+0x58/0x300 <4> [604.892604] kernel_init_freeable+0x17b/0x1dc <4> [604.892605] kernel_init+0x5/0x100 <4> [604.892606] ret_from_fork+0x24/0x50 <4> [604.892607] -> #5 (reservation_ww_class_acquire){+.+.}: <4> [604.892609] dma_resv_lockdep+0xec/0x1f5 <4> [604.892610] do_one_initcall+0x58/0x300 <4> [604.892610] kernel_init_freeable+0x17b/0x1dc <4> [604.892611] kernel_init+0x5/0x100 <4> [604.892612] ret_from_fork+0x24/0x50 <4> [604.892613] -> #4 (&mm->mmap_sem#2){++++}: <4> [604.892615] __might_fault+0x63/0x90 <4> [604.892617] _copy_to_user+0x1e/0x80 <4> [604.892619] perf_read+0x200/0x2b0 <4> [604.892621] vfs_read+0x96/0x160 <4> [604.892622] ksys_read+0x9f/0xe0 <4> [604.892623] do_syscall_64+0x4f/0x220 <4> [604.892624] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [604.892625] -> #3 (&cpuctx_mutex){+.+.}: <4> [604.892626] __mutex_lock+0x9a/0x9c0 <4> [604.892627] perf_event_init_cpu+0xa4/0x140 <4> [604.892629] perf_event_init+0x19d/0x1cd <4> [604.892630] start_kernel+0x362/0x4e4 <4> [604.892631] secondary_startup_64+0xa4/0xb0 <4> [604.892631] -> #2 (pmus_lock){+.+.}: <4> [604.892633] __mutex_lock+0x9a/0x9c0 <4> [604.892633] perf_event_init_cpu+0x6b/0x140 <4> [604.892635] cpuhp_invoke_callback+0x9b/0x9d0 <4> [604.892636] _cpu_up+0xa2/0x140 <4> [604.892637] do_cpu_up+0x61/0xa0 <4> [604.892639] smp_init+0x57/0x96 <4> [604.892639] kernel_init_freeable+0x87/0x1dc <4> [604.892640] kernel_init+0x5/0x100 <4> [604.892642] ret_from_fork+0x24/0x50 <4> [604.892642] -> #1 (cpu_hotplug_lock.rw_sem){++++}: <4> [604.892643] cpus_read_lock+0x34/0xd0 <4> [604.892644] rcu_barrier+0xaa/0x190 <4> [604.892645] kernel_init+0x21/0x100 <4> [604.892647] ret_from_fork+0x24/0x50 <4> [604.892647] -> #0 (rcu_state.barrier_mutex){+.+.}: <4> [604.892649] __lock_acquire+0x1328/0x15d0 <4> [604.892650] lock_acquire+0xa7/0x1c0 <4> [604.892651] __mutex_lock+0x9a/0x9c0 <4> [604.892652] rcu_barrier+0x23/0x190 <4> [604.892680] i915_gem_object_unbind+0x29d/0x3f0 [i915] <4> [604.892707] i915_gem_object_pin_to_display_plane+0x141/0x270 [i915] <4> [604.892737] intel_pin_and_fence_fb_obj+0xec/0x1f0 [i915] <4> [604.892767] intel_plane_pin_fb+0x3f/0xd0 [i915] <4> [604.892797] intel_prepare_plane_fb+0x13b/0x5c0 [i915] <4> [604.892798] drm_atomic_helper_prepare_planes+0x85/0x110 <4> [604.892827] intel_atomic_commit+0xda/0x390 [i915] <4> [604.892828] drm_atomic_helper_set_config+0x57/0xa0 <4> [604.892830] drm_mode_setcrtc+0x1c4/0x720 <4> [604.892830] drm_ioctl_kernel+0xb0/0xf0 <4> [604.892831] drm_ioctl+0x2e1/0x390 <4> [604.892833] ksys_ioctl+0x7b/0x90 <4> [604.892835] __x64_sys_ioctl+0x11/0x20 <4> [604.892835] do_syscall_64+0x4f/0x220 <4> [604.892836] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [604.892837] Changes since v1: - Use (*values)[n++] in perf_read_one(). Changes since v2: - Centrally allocate values. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20200502171413.9133-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi --- kernel/events/core.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4829e9b397d5..5e27cb0be017 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5292,20 +5292,16 @@ static int __perf_read_group_add(struct perf_event *leader, } static int perf_read_group(struct perf_event *event, - u64 read_format, char __user *buf) + u64 read_format, char __user *buf, + u64 *values) { struct perf_event *leader = event->group_leader, *child; struct perf_event_context *ctx = leader->ctx; int ret; - u64 *values; lockdep_assert_held(&ctx->mutex); - values = kzalloc(event->read_size, GFP_KERNEL); - if (!values) - return -ENOMEM; - - values[0] = 1 + leader->nr_siblings; + *values = 1 + leader->nr_siblings; /* * By locking the child_mutex of the leader we effectively @@ -5323,25 +5319,17 @@ static int perf_read_group(struct perf_event *event, goto unlock; } - mutex_unlock(&leader->child_mutex); - ret = event->read_size; - if (copy_to_user(buf, values, event->read_size)) - ret = -EFAULT; - goto out; - unlock: mutex_unlock(&leader->child_mutex); -out: - kfree(values); return ret; } static int perf_read_one(struct perf_event *event, - u64 read_format, char __user *buf) + u64 read_format, char __user *buf, + u64 *values) { u64 enabled, running; - u64 values[4]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -5352,9 +5340,6 @@ static int perf_read_one(struct perf_event *event, if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); - if (copy_to_user(buf, values, n * sizeof(u64))) - return -EFAULT; - return n * sizeof(u64); } @@ -5375,7 +5360,8 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -__perf_read(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, + size_t count, u64 *values) { u64 read_format = event->attr.read_format; int ret; @@ -5393,9 +5379,9 @@ __perf_read(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event->ctx->parent_ctx); if (read_format & PERF_FORMAT_GROUP) - ret = perf_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf, values); else - ret = perf_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf, values); return ret; } @@ -5405,16 +5391,31 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_event *event = file->private_data; struct perf_event_context *ctx; + u64 stack_values[8]; + u64 *values; int ret; ret = security_perf_event_read(event); if (ret) return ret; + if (event->read_size <= sizeof(stack_values)) + values = memset(stack_values, 0, event->read_size); + else + values = kzalloc(event->read_size, GFP_KERNEL); + if (!values) + return -ENOMEM; + ctx = perf_event_ctx_lock(event); - ret = __perf_read(event, buf, count); + ret = __perf_read(event, buf, count, values); perf_event_ctx_unlock(event, ctx); + if (ret > 0 && copy_to_user(buf, values, ret)) + ret = -EFAULT; + + if (values != stack_values) + kfree(values); + return ret; } -- cgit v1.2.3 From a363d2fd50e9145a3bb69113ccec42115e15d276 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 31 Oct 2020 19:20:41 +0000 Subject: HAX suspend: Disable S3/S4 for fi-bdw-samus Signed-off-by: Rodrigo Vivi --- drivers/acpi/sleep.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index eaa47753b758..03b911e94df2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -81,11 +81,16 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } +static u8 max_sleep_state = -1; + bool acpi_sleep_state_supported(u8 sleep_state) { acpi_status status; u8 type_a, type_b; + if (sleep_state > max_sleep_state) + return false; + status = acpi_get_sleep_type_data(sleep_state, &type_a, &type_b); return ACPI_SUCCESS(status) && (!acpi_gbl_reduced_hardware || (acpi_gbl_FADT.sleep_control.address @@ -162,6 +167,13 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } +static int __init init_nosleep(const struct dmi_system_id *d) +{ + pr_info("Disabling ACPI suspend\n"); + max_sleep_state = 0; + return 0; +} + bool acpi_sleep_default_s3; static int __init init_default_s3(const struct dmi_system_id *d) @@ -374,6 +386,15 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, + + { + .callback = init_nosleep, + .ident = "samus", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Samus"), + }, + }, {}, }; -- cgit v1.2.3 From f496bc6b79abaf149cd25f19223ba95b628544aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Nov 2020 14:51:55 +0000 Subject: HAX x86/rapl: Treat Tigerlake like Icelake Since Tigerlake seems to have inherited its cstates and other rapl power caps from Icelake, assume it also follows Icelake for its rapl events. Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 85feafacc445..7c8c6d60be33 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -804,6 +804,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); -- cgit v1.2.3 From 4739347de2b97aa9b045a4721099ad539ed507e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Nov 2020 23:51:06 +0000 Subject: HAX sound: Disable probing snd_hda with DG1 Audio component in i915 is not hooked up yet causing long timeouts and angry abortive CI. Signed-off-by: Rodrigo Vivi --- sound/hda/hdac_i915.c | 23 +++++++++++++++++++++++ sound/pci/hda/hda_intel.c | 2 -- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 454474ac5716..fbca4bf53a47 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -128,6 +128,26 @@ static bool i915_gfx_present(void) return pci_dev_present(ids); } +static bool dg1_gfx_present(void) +{ + static const struct pci_device_id ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4905), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16 }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4906), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16 }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4907), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16 }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4908), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16 }, + {} + }; + return pci_dev_present(ids); +} + /** * snd_hdac_i915_init - Initialize i915 audio component * @bus: HDA core bus @@ -148,6 +168,9 @@ int snd_hdac_i915_init(struct hdac_bus *bus) if (!i915_gfx_present()) return -ENODEV; + if (dg1_gfx_present()) + return -ENODEV; + err = snd_hdac_acomp_init(bus, NULL, i915_component_master_match, sizeof(struct i915_audio_component) - sizeof(*acomp)); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index fe51163f2d82..448c1518844a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2244,8 +2244,6 @@ static int azx_probe_continue(struct azx *chip) * codecs can be on the same link. */ if (CONTROLLER_IN_GPU(pci)) { - dev_err(chip->card->dev, - "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n"); goto out_free; } else { /* don't bother any longer */ -- cgit v1.2.3 From 97c55a524023aad2930a5382c0d1404a040b97be Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Dec 2020 16:47:00 +0000 Subject: HAX net/phy: Suppress WARN for calling stop while halted Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2805 Signed-off-by: Rodrigo Vivi --- drivers/net/phy/phy.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index beb2b66da132..8a91b8cfd94c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1034,11 +1034,8 @@ void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; - if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); + if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) return; - } mutex_lock(&phydev->lock); -- cgit v1.2.3 From a457e2229c46d6580148d9b0be3740c447ec7783 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jan 2021 20:40:54 +0000 Subject: HAX net/phy: Suppress WARN from phy_error Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2874 Signed-off-by: Rodrigo Vivi --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8a91b8cfd94c..48977bae20dd 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -939,7 +939,7 @@ void phy_stop_machine(struct phy_device *phydev) */ void phy_error(struct phy_device *phydev) { - WARN_ON(1); + pr_notice_once("%s\n", __func__); mutex_lock(&phydev->lock); phydev->state = PHY_HALTED; -- cgit v1.2.3 From 4c82fd0506b4cc196a375d1200361fb0c5439acd Mon Sep 17 00:00:00 2001 From: Zbigniew Kempczyński Date: Thu, 17 Jun 2021 07:44:18 +0200 Subject: drm/i915: Add relocation exceptions for two other platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For topic/intel-for-CI branch only. We have established previously we stop using relocations starting from gen12 platforms with Tigerlake as an exception. We keep this statement but we want to enable relocations conditionally for Alderlake S+P under require_force_probe flag set. Keeping relocations under require_force_probe flag is interim solution until IGTs will be rewritten to use softpin. v2: - remove inline from function definition (Jani) - fix indentation v3: change to GRAPHICS_VER() (Zbigniew) v4: remove RKL from flag as it is already shipped (Rodrigo) v5: prepare patch to be used within topic/intel-for-CI branch only v6: change comment (Rodrigo) Signed-off-by: Zbigniew Kempczyński Cc: Dave Airlie Cc: Daniel Vetter Cc: Jason Ekstrand Cc: Rodrigo Vivi Acked-by: Dave Airlie Acked-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4d7da07442f2..9c323666bd7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -490,16 +490,27 @@ eb_unreserve_vma(struct eb_vma *ev) ev->flags &= ~__EXEC_OBJECT_RESERVED; } +static bool platform_has_relocs_enabled(const struct i915_execbuffer *eb) +{ + /* + * Relocations are disallowed starting from gen12 with Tigerlake + * as an exception. To unblock CI, we are temporarily allowing it + * for Rocketlake and Alderlake. + */ + if (GRAPHICS_VER(eb->i915) < 12 || IS_TIGERLAKE(eb->i915) || + IS_ROCKETLAKE(eb->i915) || IS_ALDERLAKE_S(eb->i915) || + IS_ALDERLAKE_P(eb->i915)) + return true; + + return false; +} + static int eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, struct i915_vma *vma) { - /* Relocations are disallowed for all platforms after TGL-LP. This - * also covers all platforms with local memory. - */ - if (entry->relocation_count && - GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) + if (entry->relocation_count && !platform_has_relocs_enabled(eb)) return -EINVAL; if (unlikely(entry->flags & eb->invalid_flags)) -- cgit v1.2.3