summaryrefslogtreecommitdiff
path: root/runner/executor.c
diff options
context:
space:
mode:
authorPetri Latvala <petri.latvala@intel.com>2018-08-08 14:07:00 +0300
committerPetri Latvala <petri.latvala@intel.com>2018-08-09 10:33:32 +0300
commit18c1e7525591b98b53321c26464f3181a5f7cce1 (patch)
treeb2752ef34feaa0a1b729029550298f7615e2fdf1 /runner/executor.c
parentf0bec8572bfc0960841435155002455fc1dabd67 (diff)
runner: New test runner
This is a new test runner to replace piglit. Piglit has been very useful as a test runner, but certain improvements have been very difficult if possible at all in a generic test running framework. Important improvements over piglit: - Faster to launch. Being able to make assumptions about what we're executing makes it possible to save significant amounts of time. For example, a testlist file's line "igt@somebinary@somesubtest" already has all the information we need to construct the correct command line to execute that particular subtest, instead of listing all subtests of all test binaries and mapping them to command lines. Same goes for the regexp filters command line flags -t and -x; If we use -x somebinaryname, we don't need to list subtests from somebinaryname, we already know none of them will get executed. - Logs of incomplete tests. Piglit collects test output to memory and dumps them to a file when the test is complete. The new runner writes all output to disk immediately. - Ability to execute multiple subtests in one binary execution. This was possible with piglit, but its semantics made it very hard to implement in practice. For example, having a testlist file not only selected a subset of tests to run, but also mandated that they be executed in the same order. - Flexible timeout support. Instead of mandating a time tests cannot exceed, the new runner has a timeout on inactivity. Activity is any output on the test's stdout or stderr, or kernel activity via /dev/kmsg. The runner is fairly piglit compatible. The command line is very similar, with a few additions. IGT_TEST_ROOT environment flag is still supported, but can also be set via command line (in place of igt.py in piglit command line). The results are a set of log files, processed into a piglit-compatible results.json file (BZ2 compression TODO). There are some new fields in the json for extra information: - "igt-version" contains the IGT version line. In multiple-subtests-mode the version information is only printed once, so it needs to be duplicated to all subtest results this way. - "dmesg-warnings" contains the dmesg lines that triggered a dmesg-warn/dmesg-fail state. - Runtime information will be different. Piglit takes a timestamp at the beginning and at the end of execution for runtime. The new runner uses the subtest output text. The binary execution time will also be included; The key "igt@somebinary" will have the runtime of the binary "somebinary", whereas "igt@somebinary@a" etc will have the runtime of the subtests. Substracting the subtest runtimes from the binary runtime yields the total time spent doing setup in igt_fixture blocks. v2: - use clock handling from igt_core instead of copypaste - install results binary - less magic numbers - scanf doesn't give empty strings after all - use designated array initialization with _F_JOURNAL and pals - add more comments to dump_dmesg - use signal in kill_child instead of bool - use more 'usual' return values for execute_entry - use signal number instead of magic integers - use IGT_EXIT_INVALID instead of magic 79 - properly remove files in clear_test_result_directory() - remove magic numbers - warn if results directory contains extra files - fix naming in matches_any - construct command line in a cleaner way in add_subtests() - clarify error in filtered_job_list - replace single string fprintfs with fputs - use getline() more sanely - refactor string constants to a shared header - explain non-nul-terminated string handling in resultgen - saner line parsing - rename gen_igt_name to generate_piglit_name - clean up parse_result_string - explain what we're parsing in resultgen - explain the runtime accumulation in add_runtime - refactor result overriding - stop passing needle sizes to find_line functions - refactor stdout/stderr parsing - fix regex whitelist compiling - add TODO for suppressions.txt - refactor dmesg parsing - fill_from_journal returns void - explain missing result fields with TODO comments - log_level parsing with typeof - pass stdout/stderr to usage() instead of a bool - fix absolute_path overflow - refactor settings serialization - remove maybe_strdup function - refactor job list serialization - refactor resuming, add new resume binary - catch mmap failure correctly v3: - rename runner to igt_runner, etc - add meson option for building the runner - use UPPER_CASE names for string constants - add TODO comments for future refactoring - add a midding close() - const correctness where applicable - also build with autotools Signed-off-by: Petri Latvala <petri.latvala@intel.com> Reviewed-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Diffstat (limited to 'runner/executor.c')
-rw-r--r--runner/executor.c1022
1 files changed, 1022 insertions, 0 deletions
diff --git a/runner/executor.c b/runner/executor.c
new file mode 100644
index 00000000..60cf4da5
--- /dev/null
+++ b/runner/executor.c
@@ -0,0 +1,1022 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/watchdog.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "igt_core.h"
+#include "executor.h"
+#include "output_strings.h"
+
+static struct {
+ int *fds;
+ size_t num_dogs;
+} watchdogs;
+
+static void close_watchdogs(struct settings *settings)
+{
+ size_t i;
+
+ if (settings && settings->log_level >= LOG_LEVEL_VERBOSE)
+ printf("Closing watchdogs\n");
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ write(watchdogs.fds[i], "V", 1);
+ close(watchdogs.fds[i]);
+ }
+}
+
+static void close_watchdogs_atexit(void)
+{
+ close_watchdogs(NULL);
+}
+
+static void init_watchdogs(struct settings *settings)
+{
+ int i;
+ char name[32];
+ int fd;
+
+ memset(&watchdogs, 0, sizeof(watchdogs));
+
+ if (!settings->use_watchdog || settings->inactivity_timeout <= 0)
+ return;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ printf("Initializing watchdogs\n");
+ }
+
+ atexit(close_watchdogs_atexit);
+
+ for (i = 0; ; i++) {
+ snprintf(name, sizeof(name), "/dev/watchdog%d", i);
+ if ((fd = open(name, O_RDWR | O_CLOEXEC)) < 0)
+ break;
+
+ watchdogs.num_dogs++;
+ watchdogs.fds = realloc(watchdogs.fds, watchdogs.num_dogs * sizeof(int));
+ watchdogs.fds[i] = fd;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE)
+ printf(" %s\n", name);
+ }
+}
+
+static int watchdogs_set_timeout(int timeout)
+{
+ size_t i;
+ int orig_timeout = timeout;
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ if (ioctl(watchdogs.fds[i], WDIOC_SETTIMEOUT, &timeout)) {
+ write(watchdogs.fds[i], "V", 1);
+ close(watchdogs.fds[i]);
+ watchdogs.fds[i] = -1;
+ continue;
+ }
+
+ if (timeout < orig_timeout) {
+ /*
+ * Timeout of this caliber refused. We want to
+ * use the same timeout for all devices.
+ */
+ return watchdogs_set_timeout(timeout);
+ }
+ }
+
+ return timeout;
+}
+
+static void ping_watchdogs(void)
+{
+ size_t i;
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ ioctl(watchdogs.fds[i], WDIOC_KEEPALIVE, 0);
+ }
+}
+
+static void prune_subtest(struct job_list_entry *entry, char *subtest)
+{
+ char *excl;
+
+ /*
+ * Subtest pruning is done by adding exclusion strings to the
+ * subtest list. The last matching item on the subtest
+ * selection command line flag decides whether to run a
+ * subtest, see igt_core.c for details. If the list is empty,
+ * the expected subtest set is unknown, so we need to add '*'
+ * first so we can start excluding.
+ */
+
+ if (entry->subtest_count == 0) {
+ entry->subtest_count++;
+ entry->subtests = realloc(entry->subtests, entry->subtest_count * sizeof(*entry->subtests));
+ entry->subtests[0] = strdup("*");
+ }
+
+ excl = malloc(strlen(subtest) + 2);
+ excl[0] = '!';
+ strcpy(excl + 1, subtest);
+
+ entry->subtest_count++;
+ entry->subtests = realloc(entry->subtests, entry->subtest_count * sizeof(*entry->subtests));
+ entry->subtests[entry->subtest_count - 1] = excl;
+}
+
+static bool prune_from_journal(struct job_list_entry *entry, int fd)
+{
+ char *subtest;
+ FILE *f;
+ bool any_pruned = false;
+
+ /*
+ * Each journal line is a subtest that has been started, or
+ * the line 'exit:$exitcode (time)', or 'timeout:$exitcode (time)'.
+ */
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return false;
+
+ while (fscanf(f, "%ms", &subtest) == 1) {
+ if (!strncmp(subtest, EXECUTOR_EXIT, strlen(EXECUTOR_EXIT))) {
+ /* Fully done. Mark that by making the binary name invalid. */
+ fscanf(f, " (%*fs)");
+ entry->binary[0] = '\0';
+ free(subtest);
+ continue;
+ }
+
+ if (!strncmp(subtest, EXECUTOR_TIMEOUT, strlen(EXECUTOR_TIMEOUT))) {
+ fscanf(f, " (%*fs)");
+ free(subtest);
+ continue;
+ }
+
+ prune_subtest(entry, subtest);
+
+ free(subtest);
+ any_pruned = true;
+ }
+
+ fclose(f);
+ return any_pruned;
+}
+
+static const char *filenames[_F_LAST] = {
+ [_F_JOURNAL] = "journal.txt",
+ [_F_OUT] = "out.txt",
+ [_F_ERR] = "err.txt",
+ [_F_DMESG] = "dmesg.txt",
+};
+
+static int open_at_end(int dirfd, const char *name)
+{
+ int fd = openat(dirfd, name, O_RDWR | O_CREAT | O_CLOEXEC, 0666);
+ char last;
+
+ if (fd >= 0) {
+ if (lseek(fd, -1, SEEK_END) >= 0 &&
+ read(fd, &last, 1) == 1 &&
+ last != '\n') {
+ write(fd, "\n", 1);
+ }
+ lseek(fd, 0, SEEK_END);
+ }
+
+ return fd;
+}
+
+static int open_for_reading(int dirfd, const char *name)
+{
+ return openat(dirfd, name, O_RDONLY);
+}
+
+bool open_output_files(int dirfd, int *fds, bool write)
+{
+ int i;
+ int (*openfunc)(int, const char*) = write ? open_at_end : open_for_reading;
+
+ for (i = 0; i < _F_LAST; i++) {
+ if ((fds[i] = openfunc(dirfd, filenames[i])) < 0) {
+ while (--i >= 0)
+ close(fds[i]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void close_outputs(int *fds)
+{
+ int i;
+
+ for (i = 0; i < _F_LAST; i++) {
+ close(fds[i]);
+ }
+}
+
+static void dump_dmesg(int kmsgfd, int outfd)
+{
+ /*
+ * Write kernel messages to the log file until we reach
+ * 'now'. Unfortunately, /dev/kmsg doesn't support seeking to
+ * -1 from SEEK_END so we need to use a second fd to read a
+ * message to match against, or stop when we reach EAGAIN.
+ */
+
+ int comparefd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
+ unsigned flags;
+ unsigned long long seq, cmpseq, usec;
+ char cont;
+ char buf[256];
+ ssize_t r;
+
+ if (comparefd < 0)
+ return;
+
+ if (fcntl(kmsgfd, F_SETFL, O_NONBLOCK))
+ return;
+
+ while (1) {
+ if (comparefd >= 0) {
+ r = read(comparefd, buf, sizeof(buf) - 1);
+ if (r < 0) {
+ if (errno != EAGAIN && errno != EPIPE)
+ return;
+ } else {
+ buf[r] = '\0';
+ if (sscanf(buf, "%u,%llu,%llu,%c;",
+ &flags, &cmpseq, &usec, &cont) == 4) {
+ /* Reading comparison record done. */
+ close(comparefd);
+ comparefd = -1;
+ }
+ }
+ }
+
+ r = read(kmsgfd, buf, sizeof(buf));
+ if (r <= 0) {
+ if (errno == EPIPE)
+ continue;
+
+ /*
+ * If EAGAIN, we're done. If some other error,
+ * we can't do anything anyway.
+ */
+ return;
+ }
+
+ write(outfd, buf, r);
+
+ if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;",
+ &flags, &seq, &usec, &cont) == 4) {
+ /*
+ * Comparison record has been read, compare
+ * the sequence number to see if we have read
+ * enough.
+ */
+ if (seq >= cmpseq)
+ return;
+ }
+ }
+}
+
+static bool kill_child(int sig, pid_t child)
+{
+ /*
+ * Send the signal to the child directly, and to the child's
+ * process group.
+ */
+ kill(-child, sig);
+ if (kill(child, sig) && errno == ESRCH) {
+ fprintf(stderr, "Child process does not exist. This shouldn't happen.\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Returns:
+ * =0 - Success
+ * <0 - Failure executing
+ * >0 - Timeout happened, need to recreate from journal
+ */
+static int monitor_output(pid_t child,
+ int outfd, int errfd, int kmsgfd, int sigfd,
+ int *outputs,
+ struct settings *settings)
+{
+ fd_set set;
+ char buf[256];
+ char *outbuf = NULL;
+ size_t outbufsize = 0;
+ char current_subtest[256] = {};
+ struct signalfd_siginfo siginfo;
+ ssize_t s;
+ int n, status;
+ int nfds = outfd;
+ int timeout = settings->inactivity_timeout;
+ int timeout_intervals = 1, intervals_left;
+ int wd_extra = 10;
+ int killed = 0; /* 0 if not killed, signal number otherwise */
+ struct timespec time_beg, time_end;
+ bool aborting = false;
+
+ igt_gettime(&time_beg);
+
+ if (errfd > nfds)
+ nfds = errfd;
+ if (kmsgfd > nfds)
+ nfds = kmsgfd;
+ if (sigfd > nfds)
+ nfds = sigfd;
+ nfds++;
+
+ if (timeout > 0) {
+ /*
+ * Use original timeout plus some leeway. If we're still
+ * alive, we want to kill the test process instead of cutting
+ * power.
+ */
+ int wd_timeout = watchdogs_set_timeout(timeout + wd_extra);
+
+ if (wd_timeout < timeout + wd_extra) {
+ /* Watchdog timeout smaller, so ping it more often */
+ if (wd_timeout - wd_extra < 0)
+ wd_extra = wd_timeout / 2;
+ timeout_intervals = timeout / (wd_timeout - wd_extra);
+ intervals_left = timeout_intervals;
+ timeout /= timeout_intervals;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ printf("Watchdog doesn't support the timeout we requested (shortened to %d seconds).\n"
+ "Using %d intervals of %d seconds.\n",
+ wd_timeout, timeout_intervals, timeout);
+ }
+ }
+ }
+
+ while (outfd >= 0 || errfd >= 0 || sigfd >= 0) {
+ struct timeval tv = { .tv_sec = timeout };
+
+ FD_ZERO(&set);
+ if (outfd >= 0)
+ FD_SET(outfd, &set);
+ if (errfd >= 0)
+ FD_SET(errfd, &set);
+ if (kmsgfd >= 0)
+ FD_SET(kmsgfd, &set);
+ if (sigfd >= 0)
+ FD_SET(sigfd, &set);
+
+ n = select(nfds, &set, NULL, NULL, timeout == 0 ? NULL : &tv);
+ if (n < 0) {
+ /* TODO */
+ return -1;
+ }
+
+ if (n == 0) {
+ intervals_left--;
+ if (intervals_left) {
+ continue;
+ }
+
+ ping_watchdogs();
+
+ switch (killed) {
+ case 0:
+ if (settings->log_level >= LOG_LEVEL_NORMAL) {
+ printf("Timeout. Killing the current test with SIGTERM.\n");
+ }
+
+ killed = SIGTERM;
+ if (!kill_child(killed, child))
+ return -1;
+
+ /*
+ * Now continue the loop and let the
+ * dying child be handled normally.
+ */
+ timeout = 2; /* Timeout for waiting selected by fair dice roll. */
+ watchdogs_set_timeout(20);
+ intervals_left = timeout_intervals = 1;
+ break;
+ case SIGTERM:
+ if (settings->log_level >= LOG_LEVEL_NORMAL) {
+ printf("Timeout. Killing the current test with SIGKILL.\n");
+ }
+
+ killed = SIGKILL;
+ if (!kill_child(killed, child))
+ return -1;
+
+ intervals_left = timeout_intervals = 1;
+ break;
+ case SIGKILL:
+ /* Nothing that can be done, really. Let's tell the caller we want to abort. */
+ if (settings->log_level >= LOG_LEVEL_NORMAL) {
+ fprintf(stderr, "Child refuses to die. Aborting.\n");
+ }
+ close_watchdogs(settings);
+ free(outbuf);
+ close(outfd);
+ close(errfd);
+ close(kmsgfd);
+ close(sigfd);
+ return -1;
+ }
+
+ continue;
+ }
+
+ intervals_left = timeout_intervals;
+ ping_watchdogs();
+
+ /* TODO: Refactor these handlers to their own functions */
+ if (outfd >= 0 && FD_ISSET(outfd, &set)) {
+ char *newline;
+
+ s = read(outfd, buf, sizeof(buf));
+ if (s <= 0) {
+ if (s < 0) {
+ fprintf(stderr, "Error reading test's stdout: %s\n",
+ strerror(errno));
+ }
+
+ close(outfd);
+ outfd = -1;
+ goto out_end;
+ }
+
+ write(outputs[_F_OUT], buf, s);
+ if (settings->sync) {
+ fdatasync(outputs[_F_OUT]);
+ }
+
+ outbuf = realloc(outbuf, outbufsize + s);
+ memcpy(outbuf + outbufsize, buf, s);
+ outbufsize += s;
+
+ while ((newline = memchr(outbuf, '\n', outbufsize)) != NULL) {
+ size_t linelen = newline - outbuf + 1;
+
+ if (linelen > strlen(STARTING_SUBTEST) &&
+ !memcmp(outbuf, STARTING_SUBTEST, strlen(STARTING_SUBTEST))) {
+ write(outputs[_F_JOURNAL], outbuf + strlen(STARTING_SUBTEST),
+ linelen - strlen(STARTING_SUBTEST));
+ memcpy(current_subtest, outbuf + strlen(STARTING_SUBTEST),
+ linelen - strlen(STARTING_SUBTEST));
+ current_subtest[linelen - strlen(STARTING_SUBTEST)] = '\0';
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ fwrite(outbuf, 1, linelen, stdout);
+ }
+ }
+ if (linelen > strlen(SUBTEST_RESULT) &&
+ !memcmp(outbuf, SUBTEST_RESULT, strlen(SUBTEST_RESULT))) {
+ char *delim = memchr(outbuf, ':', linelen);
+
+ if (delim != NULL) {
+ size_t subtestlen = delim - outbuf - strlen(SUBTEST_RESULT);
+ if (memcmp(current_subtest, outbuf + strlen(SUBTEST_RESULT),
+ subtestlen)) {
+ /* Result for a test that didn't ever start */
+ write(outputs[_F_JOURNAL],
+ outbuf + strlen(SUBTEST_RESULT),
+ subtestlen);
+ write(outputs[_F_JOURNAL], "\n", 1);
+ if (settings->sync) {
+ fdatasync(outputs[_F_JOURNAL]);
+ }
+ current_subtest[0] = '\0';
+ }
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ fwrite(outbuf, 1, linelen, stdout);
+ }
+ }
+ }
+
+ memmove(outbuf, newline + 1, outbufsize - linelen);
+ outbufsize -= linelen;
+ }
+ }
+ out_end:
+
+ if (errfd >= 0 && FD_ISSET(errfd, &set)) {
+ s = read(errfd, buf, sizeof(buf));
+ if (s <= 0) {
+ if (s < 0) {
+ fprintf(stderr, "Error reading test's stderr: %s\n",
+ strerror(errno));
+ }
+ close(errfd);
+ errfd = -1;
+ } else {
+ write(outputs[_F_ERR], buf, s);
+ if (settings->sync) {
+ fdatasync(outputs[_F_ERR]);
+ }
+ }
+ }
+
+ if (kmsgfd >= 0 && FD_ISSET(kmsgfd, &set)) {
+ s = read(kmsgfd, buf, sizeof(buf));
+ if (s < 0) {
+ if (errno != EPIPE) {
+ fprintf(stderr, "Error reading from kmsg, stopping monitoring: %s\n",
+ strerror(errno));
+ close(kmsgfd);
+ kmsgfd = -1;
+ }
+ } else {
+ write(outputs[_F_DMESG], buf, s);
+ if (settings->sync) {
+ fdatasync(outputs[_F_DMESG]);
+ }
+ }
+ }
+
+ if (sigfd >= 0 && FD_ISSET(sigfd, &set)) {
+ double time;
+
+ s = read(sigfd, &siginfo, sizeof(siginfo));
+ if (s < 0) {
+ fprintf(stderr, "Error reading from signalfd: %s\n",
+ strerror(errno));
+ continue;
+ } else if (siginfo.ssi_signo == SIGCHLD) {
+ if (child != waitpid(child, &status, WNOHANG)) {
+ fprintf(stderr, "Failed to reap child\n");
+ status = 9999;
+ } else if (WIFEXITED(status)) {
+ status = WEXITSTATUS(status);
+ if (status >= 128) {
+ status = 128 - status;
+ }
+ } else if (WIFSIGNALED(status)) {
+ status = -WTERMSIG(status);
+ } else {
+ status = 9999;
+ }
+ } else {
+ /* We're dying, so we're taking them with us */
+ if (settings->log_level >= LOG_LEVEL_NORMAL)
+ printf("Abort requested, terminating children\n");
+
+ aborting = true;
+ timeout = 2;
+ killed = SIGTERM;
+ if (!kill_child(killed, child))
+ return -1;
+
+ continue;
+ }
+
+ igt_gettime(&time_end);
+
+ time = igt_time_elapsed(&time_beg, &time_end);
+ if (time < 0.0)
+ time = 0.0;
+
+ if (!aborting) {
+ dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
+ killed ? EXECUTOR_TIMEOUT : EXECUTOR_EXIT,
+ status, time);
+ if (settings->sync) {
+ fdatasync(outputs[_F_JOURNAL]);
+ }
+ }
+
+ close(sigfd);
+ sigfd = -1;
+ child = 0;
+ }
+ }
+
+ dump_dmesg(kmsgfd, outputs[_F_DMESG]);
+ if (settings->sync)
+ fdatasync(outputs[_F_DMESG]);
+
+ free(outbuf);
+ close(outfd);
+ close(errfd);
+ close(kmsgfd);
+ close(sigfd);
+
+ if (aborting)
+ return -1;
+
+ return killed;
+}
+
+static void execute_test_process(int outfd, int errfd,
+ struct settings *settings,
+ struct job_list_entry *entry)
+{
+ char *argv[4] = {};
+ size_t rootlen;
+
+ dup2(outfd, STDOUT_FILENO);
+ dup2(errfd, STDERR_FILENO);
+
+ setpgid(0, 0);
+
+ rootlen = strlen(settings->test_root);
+ argv[0] = malloc(rootlen + strlen(entry->binary) + 2);
+ strcpy(argv[0], settings->test_root);
+ argv[0][rootlen] = '/';
+ strcpy(argv[0] + rootlen + 1, entry->binary);
+
+ if (entry->subtest_count) {
+ size_t argsize;
+ size_t i;
+
+ argv[1] = strdup("--run-subtest");
+ argsize = strlen(entry->subtests[0]);
+ argv[2] = malloc(argsize + 1);
+ strcpy(argv[2], entry->subtests[0]);
+
+ for (i = 1; i < entry->subtest_count; i++) {
+ char *sub = entry->subtests[i];
+ size_t sublen = strlen(sub);
+
+ argv[2] = realloc(argv[2], argsize + sublen + 2);
+ argv[2][argsize] = ',';
+ strcpy(argv[2] + argsize + 1, sub);
+ argsize += sublen + 1;
+ }
+ }
+
+ execv(argv[0], argv);
+ fprintf(stderr, "Cannot execute %s\n", argv[0]);
+ exit(IGT_EXIT_INVALID);
+}
+
+static int digits(size_t num)
+{
+ int ret = 0;
+ while (num) {
+ num /= 10;
+ ret++;
+ }
+
+ if (ret == 0) ret++;
+ return ret;
+}
+
+/*
+ * Returns:
+ * =0 - Success
+ * <0 - Failure executing
+ * >0 - Timeout happened, need to recreate from journal
+ */
+static int execute_entry(size_t idx,
+ size_t total,
+ struct settings *settings,
+ struct job_list_entry *entry,
+ int testdirfd, int resdirfd)
+{
+ int dirfd;
+ int outputs[_F_LAST];
+ int kmsgfd;
+ int sigfd;
+ sigset_t mask;
+ int outpipe[2] = { -1, -1 };
+ int errpipe[2] = { -1, -1 };
+ char name[32];
+ pid_t child;
+ int result;
+
+ snprintf(name, sizeof(name), "%zd", idx);
+ mkdirat(resdirfd, name, 0777);
+ if ((dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY | O_CLOEXEC)) < 0) {
+ fprintf(stderr, "Error accessing individual test result directory\n");
+ return -1;
+ }
+
+ if (!open_output_files(dirfd, outputs, true)) {
+ close(dirfd);
+ fprintf(stderr, "Error opening output files\n");
+ return -1;
+ }
+
+ if (settings->sync) {
+ fsync(dirfd);
+ fsync(resdirfd);
+ }
+
+ if (pipe(outpipe) || pipe(errpipe)) {
+ close_outputs(outputs);
+ close(dirfd);
+ close(outpipe[0]);
+ close(outpipe[1]);
+ close(errpipe[0]);
+ close(errpipe[1]);
+ fprintf(stderr, "Error creating pipes: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if ((kmsgfd = open("/dev/kmsg", O_RDONLY | O_CLOEXEC)) < 0) {
+ fprintf(stderr, "Warning: Cannot open /dev/kmsg\n");
+ } else {
+ /* TODO: Checking of abort conditions in pre-execute dmesg */
+ lseek(kmsgfd, 0, SEEK_END);
+ }
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ sigaddset(&mask, SIGINT);
+ sigaddset(&mask, SIGTERM);
+ sigaddset(&mask, SIGQUIT);
+ sigprocmask(SIG_BLOCK, &mask, NULL);
+ sigfd = signalfd(-1, &mask, O_CLOEXEC);
+
+ if (sigfd < 0) {
+ /* TODO: Handle better */
+ fprintf(stderr, "Cannot monitor child process with signalfd\n");
+ close(outpipe[0]);
+ close(errpipe[0]);
+ close(outpipe[1]);
+ close(errpipe[1]);
+ close(kmsgfd);
+ close_outputs(outputs);
+ close(dirfd);
+ return -1;
+ }
+
+ if (settings->log_level >= LOG_LEVEL_NORMAL) {
+ int width = digits(total);
+ printf("[%0*zd/%0*zd] %s", width, idx + 1, width, total, entry->binary);
+ if (entry->subtest_count > 0) {
+ size_t i;
+ const char *delim = "";
+
+ printf(" (");
+ for (i = 0; i < entry->subtest_count; i++) {
+ printf("%s%s", delim, entry->subtests[i]);
+ delim = ", ";
+ }
+ printf(")");
+ }
+ printf("\n");
+ }
+
+ if ((child = fork())) {
+ int outfd = outpipe[0];
+ int errfd = errpipe[0];
+ close(outpipe[1]);
+ close(errpipe[1]);
+
+ result = monitor_output(child, outfd, errfd, kmsgfd, sigfd,
+ outputs, settings);
+ } else {
+ int outfd = outpipe[1];
+ int errfd = errpipe[1];
+ close(outpipe[0]);
+ close(errpipe[0]);
+
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+ setenv("IGT_SENTINEL_ON_STDERR", "1", 1);
+
+ execute_test_process(outfd, errfd, settings, entry);
+ }
+
+ /* TODO: Refactor this whole function to use onion teardown */
+ close(outpipe[1]);
+ close(errpipe[1]);
+ close(kmsgfd);
+ close_outputs(outputs);
+ close(dirfd);
+
+ return result;
+}
+
+static int remove_file(int dirfd, const char *name)
+{
+ return unlinkat(dirfd, name, 0) && errno != ENOENT;
+}
+
+static bool clear_test_result_directory(int dirfd)
+{
+ int i;
+
+ for (i = 0; i < _F_LAST; i++) {
+ if (remove_file(dirfd, filenames[i])) {
+ fprintf(stderr, "Error deleting %s from test result directory: %s\n",
+ filenames[i],
+ strerror(errno));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool clear_old_results(char *path)
+{
+ int dirfd;
+ size_t i;
+
+ if ((dirfd = open(path, O_DIRECTORY | O_RDONLY)) < 0) {
+ if (errno == ENOENT) {
+ /* Successfully cleared if it doesn't even exist */
+ return true;
+ }
+
+ fprintf(stderr, "Error clearing old results: %s\n", strerror(errno));
+ return false;
+ }
+
+ if (unlinkat(dirfd, "uname.txt", 0) && errno != ENOENT) {
+ close(dirfd);
+ fprintf(stderr, "Error clearing old results: %s\n", strerror(errno));
+ return false;
+ }
+
+ for (i = 0; true; i++) {
+ char name[32];
+ int resdirfd;
+
+ snprintf(name, sizeof(name), "%zd", i);
+ if ((resdirfd = openat(dirfd, name, O_DIRECTORY | O_RDONLY)) < 0)
+ break;
+
+ if (!clear_test_result_directory(resdirfd)) {
+ close(resdirfd);
+ close(dirfd);
+ return false;
+ }
+ close(resdirfd);
+ if (unlinkat(dirfd, name, AT_REMOVEDIR)) {
+ fprintf(stderr,
+ "Warning: Result directory %s contains extra files\n",
+ name);
+ }
+ }
+
+ close(dirfd);
+
+ return true;
+}
+
+bool initialize_execute_state_from_resume(int dirfd,
+ struct execute_state *state,
+ struct settings *settings,
+ struct job_list *list)
+{
+ struct job_list_entry *entry;
+ int resdirfd, fd, i;
+
+ free_settings(settings);
+ free_job_list(list);
+ memset(state, 0, sizeof(*state));
+
+ if (!read_settings(settings, dirfd) ||
+ !read_job_list(list, dirfd)) {
+ close(dirfd);
+ return false;
+ }
+
+ for (i = list->size; i >= 0; i--) {
+ char name[32];
+
+ snprintf(name, sizeof(name), "%d", i);
+ if ((resdirfd = openat(dirfd, name, O_DIRECTORY | O_RDONLY)) >= 0)
+ break;
+ }
+
+ if (i < 0)
+ /* Nothing has been executed yet, state is fine as is */
+ goto success;
+
+ entry = &list->entries[i];
+ state->next = i;
+ if ((fd = openat(resdirfd, filenames[_F_JOURNAL], O_RDONLY)) >= 0) {
+ if (!prune_from_journal(entry, fd)) {
+ /*
+ * The test does not have subtests, or
+ * incompleted before the first subtest
+ * began. Either way, not suitable to
+ * re-run.
+ */
+ state->next = i + 1;
+ } else if (entry->binary[0] == '\0') {
+ /* This test is fully completed */
+ state->next = i + 1;
+ }
+
+ close(fd);
+ }
+
+ success:
+ close(resdirfd);
+ close(dirfd);
+
+ return true;
+}
+
+bool initialize_execute_state(struct execute_state *state,
+ struct settings *settings,
+ struct job_list *job_list)
+{
+ memset(state, 0, sizeof(*state));
+
+ if (!validate_settings(settings))
+ return false;
+
+ if (!serialize_settings(settings) ||
+ !serialize_job_list(job_list, settings))
+ return false;
+
+ if (settings->overwrite &&
+ !clear_old_results(settings->results_path))
+ return false;
+
+ return true;
+}
+
+bool execute(struct execute_state *state,
+ struct settings *settings,
+ struct job_list *job_list)
+{
+ struct utsname unamebuf;
+ int resdirfd, testdirfd, unamefd;
+
+ if ((resdirfd = open(settings->results_path, O_DIRECTORY | O_RDONLY)) < 0) {
+ /* Initialize state should have done this */
+ fprintf(stderr, "Error: Failure opening results path %s\n",
+ settings->results_path);
+ return false;
+ }
+
+ if ((testdirfd = open(settings->test_root, O_DIRECTORY | O_RDONLY)) < 0) {
+ fprintf(stderr, "Error: Failure opening test root %s\n",
+ settings->test_root);
+ close(resdirfd);
+ return false;
+ }
+
+ /* TODO: On resume, don't rewrite, verify that content matches current instead */
+ if ((unamefd = openat(resdirfd, "uname.txt", O_CREAT | O_WRONLY | O_TRUNC, 0666)) < 0) {
+ fprintf(stderr, "Error: Failure creating opening uname.txt: %s\n",
+ strerror(errno));
+ close(testdirfd);
+ close(resdirfd);
+ return false;
+ }
+
+ init_watchdogs(settings);
+
+ if (!uname(&unamebuf)) {
+ dprintf(unamefd, "%s %s %s %s %s\n",
+ unamebuf.sysname,
+ unamebuf.nodename,
+ unamebuf.release,
+ unamebuf.version,
+ unamebuf.machine);
+ } else {
+ dprintf(unamefd, "uname() failed\n");
+ }
+ close(unamefd);
+
+ for (; state->next < job_list->size;
+ state->next++) {
+ int result = execute_entry(state->next,
+ job_list->size,
+ settings,
+ &job_list->entries[state->next],
+ testdirfd, resdirfd);
+ if (result != 0) {
+ close(testdirfd);
+ close_watchdogs(settings);
+ if (result > 0) {
+ initialize_execute_state_from_resume(resdirfd, state, settings, job_list);
+ return execute(state, settings, job_list);
+ }
+ close(resdirfd);
+ return false;
+ }
+ }
+
+ close(testdirfd);
+ close(resdirfd);
+ close_watchdogs(settings);
+ return true;
+}