summaryrefslogtreecommitdiff
path: root/runner/executor.c
diff options
context:
space:
mode:
authorPetri Latvala <petri.latvala@intel.com>2018-11-09 13:13:03 +0200
committerPetri Latvala <petri.latvala@intel.com>2018-11-15 10:35:23 +0200
commit111593c49d812a4f4ff9ab0ef053a3ab88a6f73f (patch)
tree7f8a34859645b3972bc587af663c7530abdec24a /runner/executor.c
parentcab148ca3ec904a94d0cd43476cf7e1f8663f906 (diff)
runner: Implement --abort-on-monitored-error
Deviating a bit from the piglit command line flag, igt_runner takes an optional comma-separated list as an argument to --abort-on-monitored-error for the list of conditions to abort on. Without a list all possible conditions will be checked. Two conditions implemented: - "taint" checks the kernel taint level for TAINT_PAGE, TAINT_DIE and TAINT_OOPS - "lockdep" checks the kernel lockdep status Checking is done after every test binary execution, and if an abort condition is met, the reason is printed to stderr (unless log level is quiet) and the runner doesn't execute any further tests. Aborting between subtests (when running in --multiple-mode) is not done. v2: - Remember to fclose - Taints are unsigned long (Chris) - Use getline instead of fgets (Chris) v3: - Fix brainfart with lockdep v4: - Rebase - Refactor the abort condition checking to pass down strings - Present the abort result in results.json as a pseudo test result - Unit tests for the pseudo result v5: - Refactors (Chris) - Don't claim lockdep was triggered if debug_locks is not on anymore. Just say it's not active. - Dump lockdep_stats when aborting due to lockdep (Chris) - Use igt@runner@aborted instead for the pseudo result (Martin) v6: - If aborting after a test, generate results.json. Like was already done for aborting at startup. - Print the test that would be executed next as well when aborting, as requested by Tomi. v7: - Remove the resolved TODO item from commit message Signed-off-by: Petri Latvala <petri.latvala@intel.com> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> Cc: Martin Peres <martin.peres@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Diffstat (limited to 'runner/executor.c')
-rw-r--r--runner/executor.c200
1 files changed, 180 insertions, 20 deletions
diff --git a/runner/executor.c b/runner/executor.c
index 007b72ce..089e6312 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -108,6 +108,91 @@ static void ping_watchdogs(void)
}
}
+static char *handle_lockdep(void)
+{
+ const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
+ int fd = open("/proc/lockdep_stats", O_RDONLY);
+ const char *debug_locks_line = " debug_locks:";
+ char buf[4096], *p;
+ ssize_t bufsize = 0;
+ int val;
+
+ if (fd < 0)
+ return NULL;
+
+ strcpy(buf, header);
+
+ if ((bufsize = read(fd, buf + strlen(header), sizeof(buf) - strlen(header) - 1)) < 0)
+ return NULL;
+ bufsize += strlen(header);
+ buf[bufsize] = '\0';
+ close(fd);
+
+ if ((p = strstr(buf, debug_locks_line)) != NULL &&
+ sscanf(p + strlen(debug_locks_line), "%d", &val) == 1 &&
+ val != 1) {
+ return strdup(buf);
+ }
+
+ return NULL;
+}
+
+static char *handle_taint(void)
+{
+ const unsigned long bad_taints =
+ 0x20 | /* TAINT_PAGE */
+ 0x80 | /* TAINT_DIE */
+ 0x200; /* TAINT_OOPS */
+ unsigned long taints = 0;
+ char *reason = NULL;
+ FILE *f;
+
+ f = fopen("/proc/sys/kernel/tainted", "r");
+ if (f) {
+ fscanf(f, "%lu", &taints);
+ fclose(f);
+ }
+
+ if (taints & bad_taints)
+ asprintf(&reason,
+ "Kernel tainted (%#lx -- %lx)",
+ taints, taints & bad_taints);
+
+ return reason;
+}
+
+static const struct {
+ int condition;
+ char *(*handler)(void);
+} abort_handlers[] = {
+ { ABORT_LOCKDEP, handle_lockdep },
+ { ABORT_TAINT, handle_taint },
+ { 0, 0 },
+};
+
+static char *need_to_abort(const struct settings* settings)
+{
+ typeof(*abort_handlers) *it;
+
+ for (it = abort_handlers; it->condition; it++) {
+ char *abort;
+
+ if (!(settings->abort_mask & it->condition))
+ continue;
+
+ abort = it->handler();
+ if (!abort)
+ continue;
+
+ if (settings->log_level >= LOG_LEVEL_NORMAL)
+ fprintf(stderr, "Aborting: %s\n", abort);
+
+ return abort;
+ }
+
+ return NULL;
+}
+
static void prune_subtest(struct job_list_entry *entry, char *subtest)
{
char *excl;
@@ -714,6 +799,37 @@ static void print_time_left(struct execute_state *state,
printf("(%*.0fs left) ", width, state->time_left);
}
+static char *entry_display_name(struct job_list_entry *entry)
+{
+ size_t size = strlen(entry->binary) + 1;
+ char *ret = malloc(size);
+
+ sprintf(ret, "%s", entry->binary);
+
+ if (entry->subtest_count > 0) {
+ size_t i;
+ const char *delim = "";
+
+ size += 3; /* strlen(" (") + strlen(")") */
+ ret = realloc(ret, size);
+ strcat(ret, " (");
+
+ for (i = 0; i < entry->subtest_count; i++) {
+ size += strlen(delim) + strlen(entry->subtests[i]);
+ ret = realloc(ret, size);
+
+ strcat(ret, delim);
+ strcat(ret, entry->subtests[i]);
+
+ delim = ", ";
+ }
+ /* There's already room for this */
+ strcat(ret, ")");
+ }
+
+ return ret;
+}
+
/*
* Returns:
* =0 - Success
@@ -797,24 +913,15 @@ static int execute_next_entry(struct execute_state *state,
}
if (settings->log_level >= LOG_LEVEL_NORMAL) {
+ char *displayname;
int width = digits(total);
printf("[%0*zd/%0*zd] ", width, idx + 1, width, total);
print_time_left(state, settings);
- printf("%s", entry->binary);
-
- if (entry->subtest_count > 0) {
- size_t i;
- const char *delim = "";
-
- printf(" (");
- for (i = 0; i < entry->subtest_count; i++) {
- printf("%s%s", delim, entry->subtests[i]);
- delim = ", ";
- }
- printf(")");
- }
+ displayname = entry_display_name(entry);
+ printf("%s", displayname);
+ free(displayname);
printf("\n");
}
@@ -896,7 +1003,8 @@ static bool clear_old_results(char *path)
if (remove_file(dirfd, "uname.txt") ||
remove_file(dirfd, "starttime.txt") ||
- remove_file(dirfd, "endtime.txt")) {
+ remove_file(dirfd, "endtime.txt") ||
+ remove_file(dirfd, "aborted.txt")) {
close(dirfd);
fprintf(stderr, "Error clearing old results: %s\n", strerror(errno));
return false;
@@ -957,6 +1065,7 @@ bool initialize_execute_state_from_resume(int dirfd,
free_settings(settings);
free_job_list(list);
memset(state, 0, sizeof(*state));
+ state->resuming = true;
if (!read_settings(settings, dirfd) ||
!read_job_list(list, dirfd)) {
@@ -1044,6 +1153,27 @@ static bool overall_timeout_exceeded(struct execute_state *state)
return state->time_left == 0.0;
}
+static void write_abort_file(int resdirfd,
+ const char *reason,
+ const char *testbefore,
+ const char *testafter)
+{
+ int abortfd;
+
+ if ((abortfd = openat(resdirfd, "aborted.txt", O_CREAT | O_WRONLY | O_EXCL, 0666)) >= 0) {
+ /*
+ * Ignore failure to open, there's
+ * already an abort probably (if this
+ * is a resume)
+ */
+ dprintf(abortfd, "Aborting.\n");
+ dprintf(abortfd, "Previous test: %s\n", testbefore);
+ dprintf(abortfd, "Next test: %s\n\n", testafter);
+ write(abortfd, reason, strlen(reason));
+ close(abortfd);
+ }
+}
+
bool execute(struct execute_state *state,
struct settings *settings,
struct job_list *job_list)
@@ -1100,14 +1230,31 @@ bool execute(struct execute_state *state,
}
close(unamefd);
+ /* Check if we're already in abort-state at bootup */
+ if (!state->resuming) {
+ char *reason;
+
+ if ((reason = need_to_abort(settings)) != NULL) {
+ char *nexttest = entry_display_name(&job_list->entries[state->next]);
+ write_abort_file(resdirfd, reason, "nothing", nexttest);
+ free(reason);
+ free(nexttest);
+
+ goto end;
+ }
+ }
+
for (; state->next < job_list->size;
state->next++) {
- int result = execute_next_entry(state,
- job_list->size,
- &time_spent,
- settings,
- &job_list->entries[state->next],
- testdirfd, resdirfd);
+ char *reason;
+ int result;
+
+ result = execute_next_entry(state,
+ job_list->size,
+ &time_spent,
+ settings,
+ &job_list->entries[state->next],
+ testdirfd, resdirfd);
if (result < 0) {
status = false;
@@ -1124,6 +1271,18 @@ bool execute(struct execute_state *state,
break;
}
+ if ((reason = need_to_abort(settings)) != NULL) {
+ char *prev = entry_display_name(&job_list->entries[state->next]);
+ char *next = (state->next + 1 < job_list->size ?
+ entry_display_name(&job_list->entries[state->next + 1]) :
+ strdup("nothing"));
+ write_abort_file(resdirfd, reason, prev, next);
+ free(prev);
+ free(next);
+ free(reason);
+ break;
+ }
+
if (result > 0) {
double time_left = state->time_left;
@@ -1140,6 +1299,7 @@ bool execute(struct execute_state *state,
close(timefd);
}
+ end:
close(testdirfd);
close(resdirfd);
close_watchdogs(settings);