diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2020-12-24 13:55:36 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2020-12-24 15:02:35 +0000 |
commit | 16f2941ad3078cac9c9ad69f3205fe4619f49edf (patch) | |
tree | 48a0e0eb62b5a118f9b7fcba36e74fcf1fb0bb24 /drivers/gpu/drm/i915/gt/intel_reset.c | |
parent | 6d393ef5ff5cac48b44781b1c1c22aabd65eba27 (diff) |
drm/i915/gt: Replace direct submit with direct call to tasklet
Rather than having special case code for opportunistically calling
process_csb() and performing a direct submit while holding the engine
spinlock for submitting the request, simply call the tasklet directly.
This allows us to retain the direct submission path, including the CS
draining to allow fast/immediate submissions, without requiring any
duplicated code paths, and most importantly greatly simplifying the
control flow by removing reentrancy. This will enable us to close a few
races in the virtual engines in the next few patches.
The trickiest part here is to ensure that paired operations (such as
schedule_in/schedule_out) remain under consistent locking domains,
e.g. when pulled outside of the engine->active.lock
v2: Use bh kicking, see commit 3c53776e29f8 ("Mark HI and TASKLET
softirq synchronous").
v3: Update engine-reset to be tasklet aware
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_reset.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_reset.c | 60 |
1 files changed, 38 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b3ccf7859ab1..7583f16c293c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -40,20 +40,19 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void engine_skip_context(struct i915_request *rq) +static void skip_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; struct intel_context *hung_ctx = rq->context; - if (!i915_request_is_active(rq)) - return; + list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { + if (!i915_request_is_active(rq)) + return; - lockdep_assert_held(&engine->active.lock); - list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->context == hung_ctx) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); } + } } static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) @@ -160,7 +159,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); if (mark_guilty(rq)) - engine_skip_context(rq); + skip_context(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); @@ -753,8 +752,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; + local_bh_disable(); for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); + local_bh_enable(); intel_ggtt_restore_fences(gt->ggtt); @@ -832,9 +833,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ + local_bh_disable(); for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + local_bh_enable(); reset_finish(gt, awake); @@ -1109,20 +1112,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) return __intel_gt_reset(engine->gt, engine->mask); } -/** - * intel_engine_reset - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no drm_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; bool uses_guc = intel_engine_in_guc_submission_mode(engine); @@ -1172,6 +1162,30 @@ out: return ret; } +/** + * intel_engine_reset - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no drm_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +{ + int err; + + local_bh_disable(); + err = __intel_engine_reset_bh(engine, msg); + local_bh_enable(); + + return err; +} + static void intel_gt_reset_global(struct intel_gt *gt, u32 engine_mask, const char *reason) @@ -1258,18 +1272,20 @@ void intel_gt_handle_error(struct intel_gt *gt, * single reset fails. */ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) continue; - if (intel_engine_reset(engine, msg) == 0) + if (__intel_engine_reset_bh(engine, msg) == 0) engine_mask &= ~engine->mask; clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); } + local_bh_enable(); } if (!engine_mask) |