diff options
| -rw-r--r-- | tools/Makefile.am | 6 | ||||
| -rw-r--r-- | tools/intel_l3_parity.c | 46 | ||||
| -rw-r--r-- | tools/intel_l3_parity.h | 31 | ||||
| -rw-r--r-- | tools/intel_l3_udev_listener.c | 108 | 
4 files changed, 186 insertions, 5 deletions
| diff --git a/tools/Makefile.am b/tools/Makefile.am index 47bd5b3a..19810cf6 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -39,7 +39,7 @@ dist_bin_SCRIPTS = intel_gpu_abrt  AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib  AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) -LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) +LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS)  intel_dump_decode_SOURCES = 	\  	intel_dump_decode.c @@ -50,3 +50,7 @@ intel_error_decode_SOURCES =	\  intel_bios_reader_SOURCES =	\  	intel_bios_reader.c	\  	intel_bios.h + +intel_l3_parity_SOURCES =	\ +	intel_l3_parity.c	\ +	intel_l3_udev_listener.c diff --git a/tools/intel_l3_parity.c b/tools/intel_l3_parity.c index d2ad3c99..ead8fb57 100644 --- a/tools/intel_l3_parity.c +++ b/tools/intel_l3_parity.c @@ -37,6 +37,14 @@  #include "intel_chipset.h"  #include "intel_gpu_tools.h"  #include "drmtest.h" +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#if HAVE_UDEV +#include <libudev.h> +#include <syslog.h> +#endif +#include "intel_l3_parity.h"  static unsigned int devid;  /* L3 size is always a function of banks. The number of banks cannot be @@ -157,7 +165,8 @@ static void usage(const char *name)  		"  -r, --row=[row]			The row to act upon (default 0)\n"  		"  -b, --bank=[bank]			The bank to act upon (default 0)\n"  		"  -s, --subbank=[subbank]		The subbank to act upon (default 0)\n" -		"  -w, --slice=[slice]			Which slice to act on (default: -1 [all])" +		"  -w, --slice=[slice]			Which slice to act on (default: -1 [all])\n" +		"    , --daemon				Run the listener (-L) as a daemon\n"  		" ACTIONS (only 1 may be specified at a time):\n"  		"  -h, --help				Display this help\n"  		"  -H, --hw-info				Display the current L3 properties\n" @@ -166,7 +175,8 @@ static void usage(const char *name)  		"  -e, --enable				Enable row, bank, subbank (undo -d)\n"  		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"  		"  -i, --inject				[HSW only] Cause hardware to inject a row errors\n" -		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n", +		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n" +		"  -L, --listen				Listen for uevent errors\n",  		name);  } @@ -179,6 +189,7 @@ int main(int argc, char *argv[])  	int fd[REAL_MAX_SLICES] = {0}, ret, i;  	int action = '0';  	int drm_fd = drm_open_any(); +	int daemonize = 0;  	devid = intel_get_drm_devid(drm_fd);  	if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid)) @@ -202,11 +213,18 @@ int main(int argc, char *argv[])  		assert(lseek(fd[i], 0, SEEK_SET) == 0);  	} +	/* NB: It is potentially unsafe to read this register if the kernel is +	 * actively using this register range, or we're running multiple +	 * instances of this tool. Since neither of those cases should occur +	 * (and the tool should be root only) we can safely ignore this for +	 * now. Just be aware of this if for some reason a hang is reported +	 * when using this tool. +	 */  	dft = intel_register_read(0xb038);  	while (1) {  		int c, option_index = 0; -		static struct option long_options[] = { +		struct option long_options[] = {  			{ "help", no_argument, 0, 'h' },  			{ "list", no_argument, 0, 'l' },  			{ "clear-all", no_argument, 0, 'a' }, @@ -215,18 +233,23 @@ int main(int argc, char *argv[])  			{ "inject", no_argument, 0, 'i' },  			{ "uninject", no_argument, 0, 'u' },  			{ "hw-info", no_argument, 0, 'H' }, +			{ "listen", no_argument, 0, 'L' },  			{ "row", required_argument, 0, 'r' },  			{ "bank", required_argument, 0, 'b' },  			{ "subbank", required_argument, 0, 's' },  			{ "slice", required_argument, 0, 'w' }, +			{ "daemon", no_argument, &daemonize, 1 },  			{0, 0, 0, 0}  		}; -		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options, +		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iuL", long_options,  				&option_index);  		if (c == -1)  			break; +		if (c == 0) +			continue; +  		switch (c) {  			case '?':  			case 'h': @@ -274,6 +297,7 @@ int main(int argc, char *argv[])  			case 'a':  			case 'l':  			case 'e': +			case 'L':  				if (action != '0') {  					fprintf(stderr, "Only one action may be specified\n");  					exit(EXIT_FAILURE); @@ -299,6 +323,20 @@ int main(int argc, char *argv[])  			printf("warning: overwriting existing injections. This is very dangerous.\n");  	} +	/* Daemon doesn't work like the other commands */ +	if (action == 'L') { +		struct l3_parity par; +		struct l3_location loc; +		if (daemonize) { +			assert(daemon(0, 0) == 0); +			openlog(argv[0], LOG_CONS | LOG_PID, LOG_USER); +		} +		memset(&par, 0, sizeof(par)); +		assert(l3_uevent_setup(&par) == 0); +		assert(l3_listen(&par, daemonize == 1, &loc) == 0); +		exit(EXIT_SUCCESS); +	} +  	if (action == 'l')  		decode_dft(dft); diff --git a/tools/intel_l3_parity.h b/tools/intel_l3_parity.h new file mode 100644 index 00000000..65697c4f --- /dev/null +++ b/tools/intel_l3_parity.h @@ -0,0 +1,31 @@ +#ifndef INTEL_L3_PARITY_H_ +#define INTEL_L3_PARITY_H_ + +#include <stdint.h> +#include <stdbool.h> + +struct l3_parity { +	struct udev *udev; +	struct udev_monitor *uevent_monitor; +	int fd; +	fd_set fdset; +}; + +struct l3_location { +	uint8_t slice; +	uint16_t row; +	uint8_t bank; +	uint8_t subbank; +}; + +#if HAVE_UDEV +int l3_uevent_setup(struct l3_parity *par); +/* Listens (blocks) for an l3 parity event. Returns the location of the error. */ +int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc); +#define l3_uevent_teardown(par) {} +#else +#define l3_uevent_setup(par, daemon, loc) -1 +#define l3_listen(par) -1 +#endif + +#endif diff --git a/tools/intel_l3_udev_listener.c b/tools/intel_l3_udev_listener.c new file mode 100644 index 00000000..c50820ca --- /dev/null +++ b/tools/intel_l3_udev_listener.c @@ -0,0 +1,108 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if HAVE_UDEV +#include <libudev.h> +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <syslog.h> +#include "i915_drm.h" +#include "intel_l3_parity.h" + +#ifndef I915_L3_PARITY_UEVENT +#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" +#endif + +int l3_uevent_setup(struct l3_parity *par) +{ +	struct udev *udev; +	struct udev_monitor *uevent_monitor; +	fd_set fdset; +	int fd, ret = -1; + +	udev = udev_new(); +	if (!udev) { +		return -1; +	} + +	uevent_monitor = udev_monitor_new_from_netlink(udev, "udev"); +	if (!uevent_monitor) +		goto err_out; + +	ret = udev_monitor_filter_add_match_subsystem_devtype(uevent_monitor, "drm", "drm_minor"); +	if (ret < 0) +		goto err_out; + +	ret = udev_monitor_enable_receiving(uevent_monitor); +	if (ret < 0) +		goto err_out; + +	fd = udev_monitor_get_fd(uevent_monitor); +	FD_ZERO(&fdset); +	FD_SET(fd, &fdset); + +	par->udev = udev; +	par->fd = fd; +	par->fdset = fdset; +	par->uevent_monitor = uevent_monitor; +	return 0; + +err_out: +	udev_unref(udev); +	return ret; +} + +int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc) +{ +	struct udev_device *udev_dev; +	const char *parity_status; +	char *err_msg; +	int ret; + +again: +	ret = select(par->fd + 1, &par->fdset, NULL, NULL, NULL); +	/* Number of bits set is returned, must be >= 1 */ +	if (ret <= 0) { +		return ret; +	} + +	assert(FD_ISSET(par->fd, &par->fdset)); + +	udev_dev = udev_monitor_receive_device(par->uevent_monitor); +	if (!udev_dev) +		return -1; + +	parity_status = udev_device_get_property_value(udev_dev, I915_L3_PARITY_UEVENT); +	if (strncmp(parity_status, "1", 1)) +		goto again; + +	loc->slice = atoi(udev_device_get_property_value(udev_dev, "SLICE")); +	loc->row = atoi(udev_device_get_property_value(udev_dev, "ROW")); +	loc->bank = atoi(udev_device_get_property_value(udev_dev, "BANK")); +	loc->subbank = atoi(udev_device_get_property_value(udev_dev, "SUBBANK")); + +	udev_device_unref(udev_dev); + +	asprintf(&err_msg, "Parity error detected on: %d,%d,%d,%d. " +			"Try to run intel_l3_parity -r %d -b %d -s %d -w %d -d", +			loc->slice, loc->row, loc->bank, loc->subbank, +			loc->row, loc->bank, loc->subbank, loc->slice); +	if (daemon) { +		syslog(LOG_INFO, "%s\n", err_msg); +		goto again; +	} + +	fprintf(stderr, "%s\n", err_msg); + +	free(err_msg); + +	return 0; +} +#endif | 
