summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-03-22 11:33:41 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2016-03-24 11:25:38 +0000
commit756f3e0cb7e7d7351e3eb955ca782a438c6aa887 (patch)
treec1673509c3b101aa70b03408e252e4cb97021308 /lib
parenteb572106b4721dcfb6bf873ae40c5200f5369039 (diff)
lib: Add a GPU error detector
If we listen to the uevents from the kernel, we can detect when the GPU hangs. This requires us to fork a helper process to do so and send a signal back to the parent. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile.am12
-rw-r--r--lib/igt_aux.c80
-rw-r--r--lib/igt_aux.h3
3 files changed, 93 insertions, 2 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a8a1eb6d..d2f2e16b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -15,12 +15,20 @@ if HAVE_VC4
endif
AM_CPPFLAGS = -I$(top_srcdir)
-AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
+AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
-DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
-DIGT_DATADIR=\""$(pkgdatadir)"\" \
-DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
-pthread
-LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
AM_CFLAGS += $(CAIRO_CFLAGS)
+libintel_tools_la_LIBADD = \
+ $(DRM_LIBS) \
+ $(PCIACCESS_LIBS) \
+ $(CAIRO_LIBS) \
+ $(LIBUDEV_LIBS) \
+ $(LIBUNWIND_LIBS) \
+ $(TIMER_LIBS) \
+ -lm
+
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index b4c301e5..bfeaa168 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -42,6 +42,7 @@
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
+#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/types.h>
@@ -359,6 +360,85 @@ void igt_stop_signal_helper(void)
sig_stat = 0;
}
+#if HAVE_UDEV
+#include <libudev.h>
+
+static struct igt_helper_process hang_detector;
+static void __attribute__((noreturn))
+hang_detector_process(pid_t pid, dev_t rdev)
+{
+ struct udev_monitor *mon =
+ udev_monitor_new_from_netlink(udev_new(), "kernel");
+ struct pollfd pfd;
+
+ udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
+ udev_monitor_enable_receiving(mon);
+
+ pfd.fd = udev_monitor_get_fd(mon);
+ pfd.events = POLLIN;
+
+ while (poll(&pfd, 1, -1) > 0) {
+ struct udev_device *dev = udev_monitor_receive_device(mon);
+ dev_t devnum;
+
+ if (dev == NULL)
+ break;
+
+ devnum = udev_device_get_devnum(dev);
+ if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
+ const char *str;
+
+ str = udev_device_get_property_value(dev, "ERROR");
+ if (str && atoi(str) == 1)
+ kill(pid, SIGRTMAX);
+ }
+
+ udev_device_unref(dev);
+ if (kill(pid, 0)) /* Parent has died, so must we. */
+ break;
+ }
+
+ exit(0);
+}
+
+static void sig_abort(int sig)
+{
+ igt_assert(!"GPU hung");
+}
+
+void igt_fork_hang_detector(int fd)
+{
+ struct stat st;
+
+ if (igt_only_list_subtests())
+ return;
+
+ igt_assert(fstat(fd, &st) == 0);
+
+ signal(SIGRTMAX, sig_abort);
+ igt_fork_helper(&hang_detector)
+ hang_detector_process(getppid(), st.st_rdev);
+}
+
+void igt_stop_hang_detector(void)
+{
+ if (igt_only_list_subtests())
+ return;
+
+ igt_stop_helper(&hang_detector);
+}
+#else
+void igt_fork_hang_detector(int fd)
+{
+ if (igt_only_list_subtests())
+ return;
+}
+
+void igt_stop_hang_detector(void)
+{
+}
+#endif
+
/**
* igt_check_boolean_env_var:
* @env_var: environment variable name
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 101fad12..cdaed297 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -40,6 +40,9 @@ extern int num_trash_bos;
void igt_fork_signal_helper(void);
void igt_stop_signal_helper(void);
+void igt_fork_hang_detector(int fd);
+void igt_stop_hang_detector(void);
+
struct igt_sigiter {
unsigned pass;
};