summaryrefslogtreecommitdiff
path: root/runner/executor.c
diff options
context:
space:
mode:
authorPetri Latvala <petri.latvala@intel.com>2019-09-09 14:38:07 +0300
committerPetri Latvala <petri.latvala@intel.com>2019-09-17 13:02:42 +0300
commitddfde25f16ba31fb480d2e83b29631aaa56526cb (patch)
tree041e5c76b531b09bd325ba94bb3eabac0136b21a /runner/executor.c
parentbc11519d627da5a0dd02c43f2912df693dba4cac (diff)
runner: Add support for aborting on network failure
If the network goes down while testing, CI tends to interpret that as the device being down, cutting its power after a while. This causes an incomplete to an innocent test, increasing noise in the results. A new flag to --abort-on-monitored-error, "ping", uses liboping to ping a host configured in .igtrc with one ping after each test execution and aborts the run if there is no reply in a hardcoded amount of time. v2: - Use a higher timeout - Allow hostname configuration from environment v3: - Use runner_c_args for holding c args for runner - Handle runner's meson options in runner/meson.build - Instead of one ping with 20 second timeout, ping with 1 second timeout for a duration of 20 seconds v4: - Rebase - Use now-exported igt_load_igtrc instead of copypaste code - Use define for timeout, clearer var name for single attempt timeout Signed-off-by: Petri Latvala <petri.latvala@intel.com> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com> Cc: Martin Peres <martin.peres@linux.intel.com> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> Cc: Daniel Vetter <daniel@ffwll.ch> Reviewed-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Diffstat (limited to 'runner/executor.c')
-rw-r--r--runner/executor.c132
1 files changed, 132 insertions, 0 deletions
diff --git a/runner/executor.c b/runner/executor.c
index 4846684e..c1cfcce8 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
#include <errno.h>
#include <fcntl.h>
+#include <glib.h>
#include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
@@ -18,6 +22,7 @@
#include <time.h>
#include <unistd.h>
+#include "igt_aux.h"
#include "igt_core.h"
#include "executor.h"
#include "output_strings.h"
@@ -147,6 +152,129 @@ static void ping_watchdogs(void)
}
}
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+
+static bool load_ping_config_from_file(void)
+{
+ GError *error = NULL;
+ GKeyFile *key_file = NULL;
+ const char *ping_hostname;
+
+ /* Load igt config file */
+ key_file = igt_load_igtrc();
+ if (!key_file)
+ return false;
+
+ ping_hostname =
+ g_key_file_get_string(key_file, "DUT",
+ "PingHostName", &error);
+
+ g_clear_error(&error);
+ g_key_file_free(key_file);
+
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from config file\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool load_ping_config_from_env(void)
+{
+ const char *ping_hostname;
+
+ ping_hostname = getenv("IGT_PING_HOSTNAME");
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from environment\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * On some hosts, getting network back up after suspend takes
+ * upwards of 10 seconds. 20 seconds should be enough to see
+ * if network comes back at all, and hopefully not too long to
+ * make external monitoring freak out.
+ */
+#define PING_ABORT_DEADLINE 20
+
+static bool can_ping(void)
+{
+ igt_until_timeout(PING_ABORT_DEADLINE) {
+ pingobj_iter_t *iter;
+
+ ping_send(pingobj);
+
+ for (iter = ping_iterator_get(pingobj);
+ iter != NULL;
+ iter = ping_iterator_next(iter)) {
+ double latency;
+ size_t len = sizeof(latency);
+
+ ping_iterator_get_info(iter,
+ PING_INFO_LATENCY,
+ &latency,
+ &len);
+ if (latency >= 0.0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#endif
+
+static void ping_config(void)
+{
+#if HAVE_OPING
+ double single_attempt_timeout = 1.0;
+
+ if (pingobj)
+ return;
+
+ pingobj = ping_construct();
+
+ /* Try env first, then config file */
+ if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
+ fprintf(stderr,
+ "abort on ping: No host to ping configured\n");
+ ping_destroy(pingobj);
+ pingobj = NULL;
+ return;
+ }
+
+ ping_setopt(pingobj, PING_OPT_TIMEOUT, &single_attempt_timeout);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+ if (pingobj && !can_ping()) {
+ char *reason;
+
+ asprintf(&reason,
+ "Ping host did not respond to ping, network down");
+ return reason;
+ }
+#endif
+
+ return NULL;
+}
+
static char *handle_lockdep(void)
{
const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -236,6 +364,7 @@ static const struct {
} abort_handlers[] = {
{ ABORT_LOCKDEP, handle_lockdep },
{ ABORT_TAINT, handle_taint },
+ { ABORT_PING, handle_ping },
{ 0, 0 },
};
@@ -1361,6 +1490,9 @@ bool execute(struct execute_state *state,
init_watchdogs(settings);
+ if (settings->abort_mask & ABORT_PING)
+ ping_config();
+
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
unamebuf.sysname,