summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-03-02 11:55:36 +0000
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-03-05 08:18:26 +0000
commitec872b7dc1fe68d153aaceaf74d55865719a76da (patch)
tree8de3dae853f78bcc61c6527e6246585a12b6e4ff
parent46cbf410ba15423d5307ba1f3ca840ddff2170c3 (diff)
tests/perf_pmu: Handle CPU hotplug failures better
CPU hotplug, especially CPU0, can be flaky on commodity hardware. To improve test reliability and reponse times when testing larger runs we need to handle those cases better. Handle failures to off-line a CPU by immediately skipping the test, and failures to on-line a CPU by immediately rebooting the machine. This patch includes igt_sysrq_reboot implementation from Chris Wilson. v2: Halt by default, reboot if env variable IGT_REBOOT_ON_FATAL_ERROR is set. (Petri Latvala) v3: Add missign docs and update stale comment. (Petri Latvala) v4: Use pause instead of sleep. (Chris Wilson) v5: Newlines! (Chris Wilson) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Petri Latvala <petri.latvala@intel.com> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> Reviewed-by: Petri Latvala <petri.latvala@intel.com>
-rw-r--r--lib/Makefile.sources2
-rw-r--r--lib/igt_core.c23
-rw-r--r--lib/igt_core.h1
-rw-r--r--lib/igt_sysrq.c27
-rw-r--r--lib/igt_sysrq.h30
-rw-r--r--lib/meson.build1
-rw-r--r--tests/perf_pmu.c38
7 files changed, 115 insertions, 7 deletions
diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 5b13ef88..3d37ef1d 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -35,6 +35,8 @@ lib_source_list = \
igt_stats.h \
igt_sysfs.c \
igt_sysfs.h \
+ igt_sysrq.c \
+ igt_sysrq.h \
igt_x86.h \
igt_x86.c \
igt_vgem.c \
diff --git a/lib/igt_core.c b/lib/igt_core.c
index c292343d..e52b806b 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -70,6 +70,7 @@
#include "igt_core.h"
#include "igt_aux.h"
#include "igt_sysfs.h"
+#include "igt_sysrq.h"
#include "igt_rc.h"
#define UNW_LOCAL_ONLY
@@ -1137,6 +1138,28 @@ void igt_fail(int exitcode)
}
/**
+ * igt_fatal_error: Stop test execution on fatal errors
+ *
+ * Stop test execution or optionally, if the IGT_REBOOT_ON_FATAL_ERROR
+ * environment variable is set, reboot the machine.
+ *
+ * Since out test runner (piglit) does support fatal test exit codes, we
+ * implement the default behaviour by waiting endlessly.
+ */
+void __attribute__((noreturn)) igt_fatal_error(void)
+{
+ if (igt_check_boolean_env_var("IGT_REBOOT_ON_FATAL_ERROR", false)) {
+ igt_warn("FATAL ERROR - REBOOTING\n");
+ igt_sysrq_reboot();
+ } else {
+ igt_warn("FATAL ERROR\n");
+ for (;;)
+ pause();
+ }
+}
+
+
+/**
* igt_can_fail:
*
* Returns true if called from either an #igt_fixture, #igt_subtest or a
diff --git a/lib/igt_core.h b/lib/igt_core.h
index 7af2b4c1..66523a20 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -311,6 +311,7 @@ void __igt_fail_assert(const char *domain, const char *file,
const char *format, ...)
__attribute__((noreturn));
void igt_exit(void) __attribute__((noreturn));
+void igt_fatal_error(void) __attribute__((noreturn));
/**
* igt_ignore_warn:
diff --git a/lib/igt_sysrq.c b/lib/igt_sysrq.c
new file mode 100644
index 00000000..3bda321f
--- /dev/null
+++ b/lib/igt_sysrq.c
@@ -0,0 +1,27 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/reboot.h>
+
+#include "igt_core.h"
+
+#include "igt_sysrq.h"
+
+/**
+ * igt_sysrq_reboot: Reboots the machine
+ *
+ * Syncs filesystems and immediately reboots the machine.
+ */
+void igt_sysrq_reboot(void)
+{
+ sync();
+
+ /* Try to be nice at first, and if that fails pull the trigger */
+ if (reboot(RB_AUTOBOOT)) {
+ int fd = open("/proc/sysrq-trigger", O_WRONLY);
+ igt_ignore_warn(write(fd, "b", 2));
+ close(fd);
+ }
+
+ abort();
+}
diff --git a/lib/igt_sysrq.h b/lib/igt_sysrq.h
new file mode 100644
index 00000000..422473d2
--- /dev/null
+++ b/lib/igt_sysrq.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IGT_SYSRQ_H__
+#define __IGT_SYSRQ_H__
+
+void igt_sysrq_reboot(void) __attribute__((noreturn));
+
+#endif /* __IGT_SYSRQ_H__ */
diff --git a/lib/meson.build b/lib/meson.build
index a9e53689..b3b8b14a 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -14,6 +14,7 @@ lib_sources = [
'igt_stats.c',
'igt_syncobj.c',
'igt_sysfs.c',
+ 'igt_sysrq.c',
'igt_vgem.c',
'igt_x86.c',
'instdone.c',
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 4713c98c..9ebffc64 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -990,6 +990,7 @@ static void cpu_hotplug(int gem_fd)
int link[2];
int fd, ret;
int cur = 0;
+ char buf;
igt_skip_on(IS_BROXTON(intel_get_drm_devid(gem_fd)));
igt_require(cpu0_hotplug_support());
@@ -1036,9 +1037,32 @@ static void cpu_hotplug(int gem_fd)
}
/* Offline followed by online a CPU. */
- igt_assert_eq(write(cpufd, "0", 2), 2);
+
+ ret = write(cpufd, "0", 2);
+ if (ret < 0) {
+ /*
+ * If we failed to offline a CPU we don't want
+ * to proceed.
+ */
+ igt_warn("Failed to offline cpu%u! (%d)\n",
+ cpu, errno);
+ igt_assert_eq(write(link[1], "s", 1), 1);
+ break;
+ }
+
usleep(1e6);
- igt_assert_eq(write(cpufd, "1", 2), 2);
+
+ ret = write(cpufd, "1", 2);
+ if (ret < 0) {
+ /*
+ * Failed to bring a CPU back online is fatal
+ * for the sanity of a test run so stop further
+ * testing.
+ */
+ igt_warn("Failed to online cpu%u! (%d)\n",
+ cpu, errno);
+ igt_fatal_error();
+ }
close(cpufd);
cpu++;
@@ -1052,15 +1076,12 @@ static void cpu_hotplug(int gem_fd)
* until the CPU core shuffler finishes one loop.
*/
for (;;) {
- char buf;
- int ret2;
-
usleep(500e3);
end_spin(gem_fd, spin[cur], 0);
/* Check if the child is signaling completion. */
- ret2 = read(link[0], &buf, 1);
- if ( ret2 == 1 || (ret2 < 0 && errno != EAGAIN))
+ ret = read(link[0], &buf, 1);
+ if ( ret == 1 || (ret < 0 && errno != EAGAIN))
break;
igt_spin_batch_free(gem_fd, spin[cur]);
@@ -1079,6 +1100,9 @@ static void cpu_hotplug(int gem_fd)
close(fd);
close(link[0]);
+ /* Skip if child signals a problem with offlining a CPU. */
+ igt_skip_on(buf == 's');
+
assert_within_epsilon(val, ts[1] - ts[0], tolerance);
}