From 756f3e0cb7e7d7351e3eb955ca782a438c6aa887 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Mar 2016 11:33:41 +0000 Subject: lib: Add a GPU error detector If we listen to the uevents from the kernel, we can detect when the GPU hangs. This requires us to fork a helper process to do so and send a signal back to the parent. Signed-off-by: Chris Wilson --- lib/Makefile.am | 12 +++++++-- lib/igt_aux.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/igt_aux.h | 3 +++ 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Makefile.am b/lib/Makefile.am index a8a1eb6d..d2f2e16b 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -15,12 +15,20 @@ if HAVE_VC4 endif AM_CPPFLAGS = -I$(top_srcdir) -AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \ +AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \ -DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \ -DIGT_DATADIR=\""$(pkgdatadir)"\" \ -DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \ -pthread -LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm AM_CFLAGS += $(CAIRO_CFLAGS) +libintel_tools_la_LIBADD = \ + $(DRM_LIBS) \ + $(PCIACCESS_LIBS) \ + $(CAIRO_LIBS) \ + $(LIBUDEV_LIBS) \ + $(LIBUNWIND_LIBS) \ + $(TIMER_LIBS) \ + -lm + diff --git a/lib/igt_aux.c b/lib/igt_aux.c index b4c301e5..bfeaa168 100644 --- a/lib/igt_aux.c +++ b/lib/igt_aux.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -359,6 +360,85 @@ void igt_stop_signal_helper(void) sig_stat = 0; } +#if HAVE_UDEV +#include + +static struct igt_helper_process hang_detector; +static void __attribute__((noreturn)) +hang_detector_process(pid_t pid, dev_t rdev) +{ + struct udev_monitor *mon = + udev_monitor_new_from_netlink(udev_new(), "kernel"); + struct pollfd pfd; + + udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL); + udev_monitor_enable_receiving(mon); + + pfd.fd = udev_monitor_get_fd(mon); + pfd.events = POLLIN; + + while (poll(&pfd, 1, -1) > 0) { + struct udev_device *dev = udev_monitor_receive_device(mon); + dev_t devnum; + + if (dev == NULL) + break; + + devnum = udev_device_get_devnum(dev); + if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) { + const char *str; + + str = udev_device_get_property_value(dev, "ERROR"); + if (str && atoi(str) == 1) + kill(pid, SIGRTMAX); + } + + udev_device_unref(dev); + if (kill(pid, 0)) /* Parent has died, so must we. */ + break; + } + + exit(0); +} + +static void sig_abort(int sig) +{ + igt_assert(!"GPU hung"); +} + +void igt_fork_hang_detector(int fd) +{ + struct stat st; + + if (igt_only_list_subtests()) + return; + + igt_assert(fstat(fd, &st) == 0); + + signal(SIGRTMAX, sig_abort); + igt_fork_helper(&hang_detector) + hang_detector_process(getppid(), st.st_rdev); +} + +void igt_stop_hang_detector(void) +{ + if (igt_only_list_subtests()) + return; + + igt_stop_helper(&hang_detector); +} +#else +void igt_fork_hang_detector(int fd) +{ + if (igt_only_list_subtests()) + return; +} + +void igt_stop_hang_detector(void) +{ +} +#endif + /** * igt_check_boolean_env_var: * @env_var: environment variable name diff --git a/lib/igt_aux.h b/lib/igt_aux.h index 101fad12..cdaed297 100644 --- a/lib/igt_aux.h +++ b/lib/igt_aux.h @@ -40,6 +40,9 @@ extern int num_trash_bos; void igt_fork_signal_helper(void); void igt_stop_signal_helper(void); +void igt_fork_hang_detector(int fd); +void igt_stop_hang_detector(void); + struct igt_sigiter { unsigned pass; }; -- cgit v1.2.3