summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_reset.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2020-12-24 13:55:36 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2020-12-24 15:02:35 +0000
commit16f2941ad3078cac9c9ad69f3205fe4619f49edf (patch)
tree48a0e0eb62b5a118f9b7fcba36e74fcf1fb0bb24 /drivers/gpu/drm/i915/gt/intel_reset.c
parent6d393ef5ff5cac48b44781b1c1c22aabd65eba27 (diff)
drm/i915/gt: Replace direct submit with direct call to tasklet
Rather than having special case code for opportunistically calling process_csb() and performing a direct submit while holding the engine spinlock for submitting the request, simply call the tasklet directly. This allows us to retain the direct submission path, including the CS draining to allow fast/immediate submissions, without requiring any duplicated code paths, and most importantly greatly simplifying the control flow by removing reentrancy. This will enable us to close a few races in the virtual engines in the next few patches. The trickiest part here is to ensure that paired operations (such as schedule_in/schedule_out) remain under consistent locking domains, e.g. when pulled outside of the engine->active.lock v2: Use bh kicking, see commit 3c53776e29f8 ("Mark HI and TASKLET softirq synchronous"). v3: Update engine-reset to be tasklet aware Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_reset.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c60
1 files changed, 38 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b3ccf7859ab1..7583f16c293c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -40,20 +40,19 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
intel_uncore_rmw_fw(uncore, reg, clr, 0);
}
-static void engine_skip_context(struct i915_request *rq)
+static void skip_context(struct i915_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
struct intel_context *hung_ctx = rq->context;
- if (!i915_request_is_active(rq))
- return;
+ list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) {
+ if (!i915_request_is_active(rq))
+ return;
- lockdep_assert_held(&engine->active.lock);
- list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->context == hung_ctx) {
i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq);
}
+ }
}
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -160,7 +159,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq);
if (mark_guilty(rq))
- engine_skip_context(rq);
+ skip_context(rq);
} else {
i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq);
@@ -753,8 +752,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
if (err)
return err;
+ local_bh_disable();
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask);
+ local_bh_enable();
intel_ggtt_restore_fences(gt->ggtt);
@@ -832,9 +833,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
set_bit(I915_WEDGED, &gt->reset.flags);
/* Mark all executing requests as skipped */
+ local_bh_disable();
for_each_engine(engine, gt, id)
if (engine->reset.cancel)
engine->reset.cancel(engine);
+ local_bh_enable();
reset_finish(gt, awake);
@@ -1109,20 +1112,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
return __intel_gt_reset(engine->gt, engine->mask);
}
-/**
- * intel_engine_reset - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no drm_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- * - identifies the request that caused the hang and it is dropped
- * - reset engine (which will force the engine to idle)
- * - re-init/configure engine
- */
-int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
+int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
{
struct intel_gt *gt = engine->gt;
bool uses_guc = intel_engine_in_guc_submission_mode(engine);
@@ -1172,6 +1162,30 @@ out:
return ret;
}
+/**
+ * intel_engine_reset - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ * @msg: reason for GPU reset; or NULL for no drm_notice()
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ * - identifies the request that caused the hang and it is dropped
+ * - reset engine (which will force the engine to idle)
+ * - re-init/configure engine
+ */
+int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
+{
+ int err;
+
+ local_bh_disable();
+ err = __intel_engine_reset_bh(engine, msg);
+ local_bh_enable();
+
+ return err;
+}
+
static void intel_gt_reset_global(struct intel_gt *gt,
u32 engine_mask,
const char *reason)
@@ -1258,18 +1272,20 @@ void intel_gt_handle_error(struct intel_gt *gt,
* single reset fails.
*/
if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
+ local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
continue;
- if (intel_engine_reset(engine, msg) == 0)
+ if (__intel_engine_reset_bh(engine, msg) == 0)
engine_mask &= ~engine->mask;
clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags);
}
+ local_bh_enable();
}
if (!engine_mask)