summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2022-05-27 08:24:52 +0100
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2022-06-17 09:03:11 +0100
commit45c64ecf97ee370bbdbd8eed7aed9c8ff5d1b0dd (patch)
tree1dd23d2db2a6b933895b9b6eef7acd233d8b1240 /drivers/gpu/drm/i915/gem
parent1556c3b4c7ed2c8f17f200d53897251fc68b7377 (diff)
drm/i915: Improve user experience and driver robustness under SIGINT or similar
We have long standing customer complaints that pressing Ctrl-C (or to the effect of) causes engine resets with otherwise well behaving programs. Not only is logging engine resets during normal operation not desirable since it creates support incidents, but more fundamentally we should avoid going the engine reset path when we can since any engine reset introduces a chance of harming an innocent context. Reason for this undesirable behaviour is that the driver currently does not distinguish between banned contexts and non-persistent contexts which have been closed. To fix this we add the distinction between the two reasons for revoking contexts, which then allows the strict timeout only be applied to banned, while innocent contexts (well behaving) can preempt cleanly and exit without triggering the engine reset path. Note that the added context exiting category applies both to closed non- persistent context, and any exiting context when hangcheck has been disabled by the user. At the same time we rename the backend operation from 'ban' to 'revoke' which more accurately describes the actual semantics. (There is no ban at the backend level since banning is a concept driven by the scheduling frontend. Backends are simply able to revoke a running context so that is the more appropriate name chosen.) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220527072452.2225610-1-tvrtko.ursulin@linux.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c23
1 files changed, 15 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a3bb73f5d53b..e116f82fc37c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1367,7 +1367,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
return engine;
}
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent)
{
struct i915_gem_engines_iter it;
struct intel_context *ce;
@@ -1381,9 +1382,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
*/
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
+ bool skip = false;
- if (ban && intel_context_ban(ce, NULL))
- continue;
+ if (exit)
+ skip = intel_context_set_exiting(ce);
+ else if (!persistent)
+ skip = intel_context_exit_nonpersistent(ce, NULL);
+
+ if (skip)
+ continue; /* Already marked. */
/*
* Check the current active state of this context; if we
@@ -1395,7 +1402,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
engine = active_engine(ce);
/* First attempt to gracefully cancel the context */
- if (engine && !__cancel_engine(engine) && ban)
+ if (engine && !__cancel_engine(engine) && (exit || !persistent))
/*
* If we are unable to send a preemptive pulse to bump
* the context from the GPU, we have to resort to a full
@@ -1407,8 +1414,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
static void kill_context(struct i915_gem_context *ctx)
{
- bool ban = (!i915_gem_context_is_persistent(ctx) ||
- !ctx->i915->params.enable_hangcheck);
struct i915_gem_engines *pos, *next;
spin_lock_irq(&ctx->stale.lock);
@@ -1421,7 +1426,8 @@ static void kill_context(struct i915_gem_context *ctx)
spin_unlock_irq(&ctx->stale.lock);
- kill_engines(pos, ban);
+ kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+ i915_gem_context_is_persistent(ctx));
spin_lock_irq(&ctx->stale.lock);
GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -1467,7 +1473,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
kill:
if (list_empty(&engines->link)) /* raced, already closed */
- kill_engines(engines, true);
+ kill_engines(engines, true,
+ i915_gem_context_is_persistent(ctx));
i915_sw_fence_commit(&engines->fence);
}