diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
21 files changed, 682 insertions, 157 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 321af109d484..dabdfe09f5e5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1368,7 +1368,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_engines(struct i915_gem_engines *engines, bool ban) +static void +kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -1382,9 +1383,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) */ for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; + bool skip = false; - if (ban && intel_context_ban(ce, NULL)) - continue; + if (exit) + skip = intel_context_set_exiting(ce); + else if (!persistent) + skip = intel_context_exit_nonpersistent(ce, NULL); + + if (skip) + continue; /* Already marked. */ /* * Check the current active state of this context; if we @@ -1396,7 +1403,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine) && ban) + if (engine && !__cancel_engine(engine) && (exit || !persistent)) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -1408,8 +1415,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) static void kill_context(struct i915_gem_context *ctx) { - bool ban = (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -1422,7 +1427,8 @@ static void kill_context(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos, ban); + kill_engines(pos, !ctx->i915->params.enable_hangcheck, + i915_gem_context_is_persistent(ctx)); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -1468,7 +1474,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines, true); + kill_engines(engines, true, + i915_gem_context_is_persistent(ctx)); i915_sw_fence_commit(&engines->fence); } @@ -1876,6 +1883,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, { const struct sseu_dev_info *device = >->info.sseu; struct drm_i915_private *i915 = gt->i915; + unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0); /* No zeros in any field. */ if (!user->slice_mask || !user->subslice_mask || @@ -1902,7 +1910,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, if (user->slice_mask & ~device->slice_mask) return -EINVAL; - if (user->subslice_mask & ~device->subslice_mask[0]) + if (user->subslice_mask & ~dev_subslice_mask) return -EINVAL; if (user->max_eus_per_subslice > device->max_eus_per_subslice) @@ -1916,7 +1924,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, /* Part specific restrictions. */ if (GRAPHICS_VER(i915) == 11) { unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int hw_ss_per_s = hweight8(dev_subslice_mask); unsigned int req_s = hweight8(context->slice_mask); unsigned int req_ss = hweight8(context->subslice_mask); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 5802692ea604..33673fe7ee0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -241,6 +241,7 @@ struct create_ext { struct drm_i915_private *i915; struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; unsigned int n_placements; + unsigned int placement_mask; unsigned long flags; }; @@ -337,6 +338,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, for (i = 0; i < args->num_regions; i++) ext_data->placements[i] = placements[i]; + ext_data->placement_mask = mask; return 0; out_dump: @@ -411,7 +413,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret; - if (args->flags) + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), @@ -427,6 +429,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, ext_data.n_placements = 1; } + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) { + if (ext_data.n_placements == 1) + return -EINVAL; + + /* + * We always need to be able to spill to system memory, if we + * can't place in the mappable part of LMEM. + */ + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM))) + return -EINVAL; + } else { + if (ext_data.n_placements > 1 || + ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM) + ext_data.flags |= I915_BO_ALLOC_GPU_ONLY; + } + obj = __i915_gem_object_create_user_ext(i915, args->size, ext_data.placements, ext_data.n_placements, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 30fe847c6664..b7b2c14fd9e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1951,7 +1951,7 @@ eb_find_first_request_added(struct i915_execbuffer *eb) #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ -static void eb_capture_stage(struct i915_execbuffer *eb) +static int eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; unsigned int i = count, j; @@ -1964,6 +1964,10 @@ static void eb_capture_stage(struct i915_execbuffer *eb) if (!(flags & EXEC_OBJECT_CAPTURE)) continue; + if (i915_gem_context_is_recoverable(eb->gem_context) && + (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) + return -EINVAL; + for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; @@ -1976,6 +1980,8 @@ static void eb_capture_stage(struct i915_execbuffer *eb) eb->capture_lists[j] = capture; } } + + return 0; } /* Commit once we're in the critical path */ @@ -2017,8 +2023,9 @@ static void eb_capture_list_clear(struct i915_execbuffer *eb) #else -static void eb_capture_stage(struct i915_execbuffer *eb) +static int eb_capture_stage(struct i915_execbuffer *eb) { + return 0; } static void eb_capture_commit(struct i915_execbuffer *eb) @@ -3410,7 +3417,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, } ww_acquire_done(&eb.ww.ctx); - eb_capture_stage(&eb); + err = eb_capture_stage(&eb); + if (err) + goto err_vma; out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..ccec4055fde3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -717,6 +717,32 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, return false; } +/** + * i915_gem_object_needs_ccs_pages - Check whether the object requires extra + * pages when placed in system-memory, in order to save and later restore the + * flat-CCS aux state when the object is moved between local-memory and + * system-memory + * @obj: Pointer to the object + * + * Return: True if the object needs extra ccs pages. False otherwise. + */ +bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) +{ + bool lmem_placement = false; + int i; + + for (i = 0; i < obj->mm.n_placements; i++) { + /* Compression is not allowed for the objects with smem placement */ + if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) + return false; + if (!lmem_placement && + obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) + lmem_placement = true; + } + + return lmem_placement; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); @@ -783,10 +809,31 @@ int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, intr, MAX_SCHEDULE_TIMEOUT); if (!ret) ret = -ETIME; + else if (ret > 0 && i915_gem_object_has_unknown_state(obj)) + ret = -EIO; return ret < 0 ? ret : 0; } +/** + * i915_gem_object_has_unknown_state - Return true if the object backing pages are + * in an unknown_state. This means that userspace must NEVER be allowed to touch + * the pages, with either the GPU or CPU. + * + * ONLY valid to be called after ensuring that all kernel fences have signalled + * (in particular the fence for moving/clearing the object). + */ +bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj) +{ + /* + * The below barrier pairs with the dma_fence_signal() in + * __memcpy_work(). We should only sample the unknown_state after all + * the kernel fences have signalled. + */ + smp_rmb(); + return obj->mm.unknown_state; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e11d82a9f7c3..6f0a3ce35567 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -524,6 +524,7 @@ int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj, struct dma_fence **fence); int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); +bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj); void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); @@ -617,6 +618,8 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj); + int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, size_t size, struct intel_memory_region *mr, struct address_space *mapping, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..5cf36a130061 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -548,6 +548,24 @@ struct drm_i915_gem_object { bool ttm_shrinkable; /** + * @unknown_state: Indicate that the object is effectively + * borked. This is write-once and set if we somehow encounter a + * fatal error when moving/clearing the pages, and we are not + * able to fallback to memcpy/memset, like on small-BAR systems. + * The GPU should also be wedged (or in the process) at this + * point. + * + * Only valid to read this after acquiring the dma-resv lock and + * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled, + * or if we otherwise know that the moving fence has signalled, + * and we are certain the pages underneath are valid for + * immediate access (under normal operation), like just prior to + * binding the object or when setting up the CPU fault handler. + * See i915_gem_object_has_unknown_state(); + */ + bool unknown_state; + + /** * Priority list of potential placements for this object. */ struct intel_memory_region **placements; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 2e16e91a5a56..4eed3dd90ba8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -670,17 +670,10 @@ fail: static int init_shmem(struct intel_memory_region *mem) { - int err; - - err = i915_gemfs_init(mem->i915); - if (err) { - DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", - err); - } - + i915_gemfs_init(mem->i915); intel_memory_region_set_name(mem, "system"); - return 0; /* Don't error, we can simply fallback to the kernel mnt */ + return 0; /* We have fallback to the kernel mnt if gemfs init failed. */ } static int release_shmem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 6a6ff98a8746..1030053571a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -36,7 +36,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static int drop_pages(struct drm_i915_gem_object *obj, +static bool drop_pages(struct drm_i915_gem_object *obj, unsigned long shrink, bool trylock_vm) { unsigned long flags; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 47b5e0e342ab..166d0a4b9e8c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -13,6 +13,8 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" +#include "gt/intel_gt_regs.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" @@ -834,8 +836,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; - lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 80ac0db1ae8c..85518b28cd72 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -114,7 +114,7 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, return i915_gem_fence_size(i915, size, tiling, stride); } -/* Check pitch constriants for all chips & tiling formats */ +/* Check pitch constraints for all chips & tiling formats */ static bool i915_tiling_ok(struct drm_i915_gem_object *obj, unsigned int tiling, unsigned int stride) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 8f1bb6a4b7d1..f131dc065f47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -266,24 +266,6 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = { .release = i915_ttm_tt_release }; -static inline bool -i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) -{ - bool lmem_placement = false; - int i; - - for (i = 0; i < obj->mm.n_placements; i++) { - /* Compression is not allowed for the objects with smem placement */ - if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) - return false; - if (!lmem_placement && - obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) - lmem_placement = true; - } - - return lmem_placement; -} - static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -682,7 +664,15 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) i915_ttm_purge(obj); } -static bool i915_ttm_resource_mappable(struct ttm_resource *res) +/** + * i915_ttm_resource_mappable - Return true if the ttm resource is CPU + * accessible. + * @res: The TTM resource to check. + * + * This is interesting on small-BAR systems where we may encounter lmem objects + * that can't be accessed via the CPU. + */ +bool i915_ttm_resource_mappable(struct ttm_resource *res) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); @@ -694,6 +684,22 @@ static bool i915_ttm_resource_mappable(struct ttm_resource *res) static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { + struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); + bool unknown_state; + + if (!obj) + return -EINVAL; + + if (!kref_get_unless_zero(&obj->base.refcount)) + return -EINVAL; + + assert_object_held(obj); + + unknown_state = i915_gem_object_has_unknown_state(obj); + i915_gem_object_put(obj); + if (unknown_state) + return -EINVAL; + if (!i915_ttm_cpu_maps_iomem(mem)) return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..e4842b4296fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -92,4 +92,7 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; } + +bool i915_ttm_resource_mappable(struct ttm_resource *res); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..9a7e50534b84 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -33,6 +33,7 @@ #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) static bool fail_gpu_migration; static bool fail_work_allocation; +static bool ban_memcpy; void i915_ttm_migrate_set_failure_modes(bool gpu_migration, bool work_allocation) @@ -40,6 +41,11 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration, fail_gpu_migration = gpu_migration; fail_work_allocation = work_allocation; } + +void i915_ttm_migrate_set_ban_memcpy(bool ban) +{ + ban_memcpy = ban; +} #endif static enum i915_cache_level @@ -258,15 +264,23 @@ struct i915_ttm_memcpy_arg { * from the callback for lockdep reasons. * @cb: Callback for the accelerated migration fence. * @arg: The argument for the memcpy functionality. + * @i915: The i915 pointer. + * @obj: The GEM object. + * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy + * or memset, we wedge the device and set the @obj unknown_state, to prevent + * further access to the object with the CPU or GPU. On some devices we might + * only be permitted to use the blitter engine for such operations. */ struct i915_ttm_memcpy_work { struct dma_fence fence; struct work_struct work; - /* The fence lock */ spinlock_t lock; struct irq_work irq_work; struct dma_fence_cb cb; struct i915_ttm_memcpy_arg arg; + struct drm_i915_private *i915; + struct drm_i915_gem_object *obj; + bool memcpy_allowed; }; static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) @@ -317,14 +331,42 @@ static void __memcpy_work(struct work_struct *work) struct i915_ttm_memcpy_work *copy_work = container_of(work, typeof(*copy_work), work); struct i915_ttm_memcpy_arg *arg = ©_work->arg; - bool cookie = dma_fence_begin_signalling(); + bool cookie; + + /* + * FIXME: We need to take a closer look here. We should be able to plonk + * this into the fence critical section. + */ + if (!copy_work->memcpy_allowed) { + struct intel_gt *gt; + unsigned int id; + + for_each_gt(gt, copy_work->i915, id) + intel_gt_set_wedged(gt); + } + + cookie = dma_fence_begin_signalling(); + + if (copy_work->memcpy_allowed) { + i915_ttm_move_memcpy(arg); + } else { + /* + * Prevent further use of the object. Any future GTT binding or + * CPU access is not allowed once we signal the fence. Outside + * of the fence critical section, we then also then wedge the gpu + * to indicate the device is not functional. + * + * The below dma_fence_signal() is our write-memory-barrier. + */ + copy_work->obj->mm.unknown_state = true; + } - i915_ttm_move_memcpy(arg); dma_fence_end_signalling(cookie); dma_fence_signal(©_work->fence); i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); dma_fence_put(©_work->fence); } @@ -336,6 +378,7 @@ static void __memcpy_irq_work(struct irq_work *irq_work) dma_fence_signal(©_work->fence); i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); dma_fence_put(©_work->fence); } @@ -389,6 +432,19 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, return &work->fence; } +static bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem) +{ + if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo))) + return false; + + if (!(i915_ttm_resource_mappable(bo->resource) && + i915_ttm_resource_mappable(dst_mem))) + return false; + + return I915_SELFTEST_ONLY(ban_memcpy) ? false : true; +} + static struct dma_fence * __i915_ttm_move(struct ttm_buffer_object *bo, const struct ttm_operation_ctx *ctx, bool clear, @@ -396,6 +452,9 @@ __i915_ttm_move(struct ttm_buffer_object *bo, struct i915_refct_sgt *dst_rsgt, bool allow_accel, const struct i915_deps *move_deps) { + const bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct drm_i915_private *i915 = to_i915(bo->base.dev); struct i915_ttm_memcpy_work *copy_work = NULL; struct i915_ttm_memcpy_arg _arg, *arg = &_arg; struct dma_fence *fence = ERR_PTR(-EINVAL); @@ -423,9 +482,14 @@ __i915_ttm_move(struct ttm_buffer_object *bo, copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); if (copy_work) { + copy_work->i915 = i915; + copy_work->memcpy_allowed = memcpy_allowed; + copy_work->obj = i915_gem_object_get(obj); arg = ©_work->arg; - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, - dst_rsgt); + if (memcpy_allowed) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, + dst_ttm, dst_rsgt); + fence = i915_ttm_memcpy_work_arm(copy_work, dep); } else { dma_fence_wait(dep, false); @@ -450,17 +514,23 @@ __i915_ttm_move(struct ttm_buffer_object *bo, } /* Error intercept failed or no accelerated migration to start with */ - if (!copy_work) - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, - dst_rsgt); - i915_ttm_move_memcpy(arg); - i915_ttm_memcpy_release(arg); + + if (memcpy_allowed) { + if (!copy_work) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + i915_ttm_move_memcpy(arg); + i915_ttm_memcpy_release(arg); + } + if (copy_work) + i915_gem_object_put(copy_work->obj); kfree(copy_work); - return NULL; + return memcpy_allowed ? NULL : ERR_PTR(-EIO); out: if (!fence && copy_work) { i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); kfree(copy_work); } @@ -539,8 +609,11 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } if (migration_fence) { - ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, - true, dst_mem); + if (I915_SELFTEST_ONLY(evict && fail_gpu_migration)) + ret = -EIO; /* never feed non-migrate fences into ttm */ + else + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, + true, dst_mem); if (ret) { dma_fence_wait(migration_fence, false); ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h index d2e7f149e05c..8a5d5ab0cc34 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -22,6 +22,7 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo); I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, bool work_allocation)); +I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_ban_memcpy(bool ban)); int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct drm_i915_gem_object *src, diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index ee87874e59dc..46b9a17d6abc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -11,16 +11,11 @@ #include "i915_gemfs.h" #include "i915_utils.h" -int i915_gemfs_init(struct drm_i915_private *i915) +void i915_gemfs_init(struct drm_i915_private *i915) { char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; struct vfsmount *gemfs; - char *opts; - - type = get_fs_type("tmpfs"); - if (!type) - return -ENODEV; /* * By creating our own shmemfs mountpoint, we can pass in @@ -28,30 +23,35 @@ int i915_gemfs_init(struct drm_i915_private *i915) * * One example, although it is probably better with a per-file * control, is selecting huge page allocations ("huge=within_size"). - * However, we only do so to offset the overhead of iommu lookups - * due to bandwidth issues (slow reads) on Broadwell+. + * However, we only do so on platforms which benefit from it, or to + * offset the overhead of iommu lookups, where with latter it is a net + * win even on platforms which would otherwise see some performance + * regressions such a slow reads issue on Broadwell and Skylake. */ - opts = NULL; - if (i915_vtd_active(i915)) { - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - opts = huge_opt; - drm_info(&i915->drm, - "Transparent Hugepage mode '%s'\n", - opts); - } else { - drm_notice(&i915->drm, - "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n"); - } - } - - gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts); + if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915)) + return; + + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + goto err; + + type = get_fs_type("tmpfs"); + if (!type) + goto err; + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); + goto err; i915->mm.gemfs = gemfs; - - return 0; + drm_info(&i915->drm, "Using Transparent Hugepages\n"); + return; + +err: + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance%s\n", + GRAPHICS_VER(i915) >= 11 ? " on this platform!" : + " when IOMMU is enabled!"); } void i915_gemfs_fini(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h index 2a1e59af3e4a..5d835e44c4f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.h +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h @@ -9,8 +9,7 @@ struct drm_i915_private; -int i915_gemfs_init(struct drm_i915_private *i915); - +void i915_gemfs_init(struct drm_i915_private *i915); void i915_gemfs_fini(struct drm_i915_private *i915); #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index ef15967be51a..72ce2c9f42fd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1623,6 +1623,7 @@ static int igt_shrink_thp(void *arg) struct file *file; unsigned int flags = PIN_USER; unsigned int n; + intel_wakeref_t wf; bool should_swap; int err; @@ -1659,9 +1660,11 @@ static int igt_shrink_thp(void *arg) goto out_put; } + wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */ + err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_put; + goto out_wf; if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { pr_info("failed to allocate THP, finishing test early\n"); @@ -1732,6 +1735,8 @@ static int igt_shrink_thp(void *arg) out_unpin: i915_vma_unpin(vma); +out_wf: + intel_runtime_pm_put(&i915->runtime_pm, wf); out_put: i915_gem_object_put(obj); out_vm: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index ddd0772fd828..3cfc621ef363 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -6,6 +6,7 @@ #include "i915_selftest.h" #include "gt/intel_context.h" +#include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" @@ -18,10 +19,71 @@ #include "huge_gem_object.h" #include "mock_context.h" +#define OW_SIZE 16 /* in bytes */ +#define F_SUBTILE_SIZE 64 /* in bytes */ +#define F_TILE_WIDTH 128 /* in bytes */ +#define F_TILE_HEIGHT 32 /* in pixels */ +#define F_SUBTILE_WIDTH OW_SIZE /* in bytes */ +#define F_SUBTILE_HEIGHT 4 /* in pixels */ + +static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp) +{ + int tile_base; + int tile_x, tile_y; + int swizzle, subtile; + int pixel_size = bpp / 8; + int pos; + + /* + * Subtile remapping for F tile. Note that map[a]==b implies map[b]==a + * so we can use the same table to tile and until. + */ + static const u8 f_subtile_map[] = { + 0, 1, 2, 3, 8, 9, 10, 11, + 4, 5, 6, 7, 12, 13, 14, 15, + 16, 17, 18, 19, 24, 25, 26, 27, + 20, 21, 22, 23, 28, 29, 30, 31, + 32, 33, 34, 35, 40, 41, 42, 43, + 36, 37, 38, 39, 44, 45, 46, 47, + 48, 49, 50, 51, 56, 57, 58, 59, + 52, 53, 54, 55, 60, 61, 62, 63 + }; + + x *= pixel_size; + /* + * Where does the 4k tile start (in bytes)? This is the same for Y and + * F so we can use the Y-tile algorithm to get to that point. + */ + tile_base = + y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT + + x / F_TILE_WIDTH * 4096; + + /* Find pixel within tile */ + tile_x = x % F_TILE_WIDTH; + tile_y = y % F_TILE_HEIGHT; + + /* And figure out the subtile within the 4k tile */ + subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH; + + /* Swizzle the subtile number according to the bspec diagram */ + swizzle = f_subtile_map[subtile]; + + /* Calculate new position */ + pos = tile_base + + swizzle * F_SUBTILE_SIZE + + tile_y % F_SUBTILE_HEIGHT * OW_SIZE + + tile_x % F_SUBTILE_WIDTH; + + GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size)); + + return pos / pixel_size * 4; +} + enum client_tiling { CLIENT_TILING_LINEAR, CLIENT_TILING_X, CLIENT_TILING_Y, + CLIENT_TILING_4, CLIENT_NUM_TILING_TYPES }; @@ -45,6 +107,36 @@ struct tiled_blits { u32 height; }; +static bool supports_x_tiling(const struct drm_i915_private *i915) +{ + int gen = GRAPHICS_VER(i915); + + if (gen < 12) + return true; + + if (!HAS_LMEM(i915) || IS_DG1(i915)) + return false; + + return true; +} + +static bool fast_blit_ok(const struct blit_buffer *buf) +{ + int gen = GRAPHICS_VER(buf->vma->vm->i915); + + if (gen < 9) + return false; + + if (gen < 12) + return true; + + /* filter out platforms with unsupported X-tile support in fastblit */ + if (buf->tiling == CLIENT_TILING_X && !supports_x_tiling(buf->vma->vm->i915)) + return false; + + return true; +} + static int prepare_blit(const struct tiled_blits *t, struct blit_buffer *dst, struct blit_buffer *src, @@ -59,51 +151,103 @@ static int prepare_blit(const struct tiled_blits *t, if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); - cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; - if (src->tiling == CLIENT_TILING_Y) - cmd |= BCS_SRC_Y; - if (dst->tiling == CLIENT_TILING_Y) - cmd |= BCS_DST_Y; - *cs++ = cmd; - - cmd = MI_FLUSH_DW; - if (ver >= 8) - cmd++; - *cs++ = cmd; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - - cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); - if (ver >= 8) - cmd += 2; - - src_pitch = t->width * 4; - if (src->tiling) { - cmd |= XY_SRC_COPY_BLT_SRC_TILED; - src_pitch /= 4; - } + if (fast_blit_ok(dst) && fast_blit_ok(src)) { + struct intel_gt *gt = t->ce->engine->gt; + u32 src_tiles = 0, dst_tiles = 0; + u32 src_4t = 0, dst_4t = 0; + + /* Need to program BLIT_CCTL if it is not done previously + * before using XY_FAST_COPY_BLT + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(BLIT_CCTL(t->ce->engine->mmio_base)); + *cs++ = (BLIT_CCTL_SRC_MOCS(gt->mocs.uc_index) | + BLIT_CCTL_DST_MOCS(gt->mocs.uc_index)); + + src_pitch = t->width; /* in dwords */ + if (src->tiling == CLIENT_TILING_4) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR); + src_4t = XY_FAST_COPY_BLT_D1_SRC_TILE4; + } else if (src->tiling == CLIENT_TILING_Y) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR); + } else if (src->tiling == CLIENT_TILING_X) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(TILE_X); + } else { + src_pitch *= 4; /* in bytes */ + } - dst_pitch = t->width * 4; - if (dst->tiling) { - cmd |= XY_SRC_COPY_BLT_DST_TILED; - dst_pitch /= 4; - } + dst_pitch = t->width; /* in dwords */ + if (dst->tiling == CLIENT_TILING_4) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR); + dst_4t = XY_FAST_COPY_BLT_D1_DST_TILE4; + } else if (dst->tiling == CLIENT_TILING_Y) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR); + } else if (dst->tiling == CLIENT_TILING_X) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(TILE_X); + } else { + dst_pitch *= 4; /* in bytes */ + } - *cs++ = cmd; - *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; - *cs++ = 0; - *cs++ = t->height << 16 | t->width; - *cs++ = lower_32_bits(dst->vma->node.start); - if (use_64b_reloc) + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2) | + src_tiles | dst_tiles; + *cs++ = src_4t | dst_4t | BLT_DEPTH_32 | dst_pitch; + *cs++ = 0; + *cs++ = t->height << 16 | t->width; + *cs++ = lower_32_bits(dst->vma->node.start); *cs++ = upper_32_bits(dst->vma->node.start); - *cs++ = 0; - *cs++ = src_pitch; - *cs++ = lower_32_bits(src->vma->node.start); - if (use_64b_reloc) + *cs++ = 0; + *cs++ = src_pitch; + *cs++ = lower_32_bits(src->vma->node.start); *cs++ = upper_32_bits(src->vma->node.start); + } else { + if (ver >= 6) { + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); + cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; + if (src->tiling == CLIENT_TILING_Y) + cmd |= BCS_SRC_Y; + if (dst->tiling == CLIENT_TILING_Y) + cmd |= BCS_DST_Y; + *cs++ = cmd; + + cmd = MI_FLUSH_DW; + if (ver >= 8) + cmd++; + *cs++ = cmd; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } + + cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); + if (ver >= 8) + cmd += 2; + + src_pitch = t->width * 4; + if (src->tiling) { + cmd |= XY_SRC_COPY_BLT_SRC_TILED; + src_pitch /= 4; + } + + dst_pitch = t->width * 4; + if (dst->tiling) { + cmd |= XY_SRC_COPY_BLT_DST_TILED; + dst_pitch /= 4; + } + + *cs++ = cmd; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; + *cs++ = 0; + *cs++ = t->height << 16 | t->width; + *cs++ = lower_32_bits(dst->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(dst->vma->node.start); + *cs++ = 0; + *cs++ = src_pitch; + *cs++ = lower_32_bits(src->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(src->vma->node.start); + } *cs++ = MI_BATCH_BUFFER_END; @@ -181,7 +325,13 @@ static int tiled_blits_create_buffers(struct tiled_blits *t, t->buffers[i].vma = vma; t->buffers[i].tiling = - i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng); + i915_prandom_u32_max_state(CLIENT_NUM_TILING_TYPES, prng); + + /* Platforms support either TileY or Tile4, not both */ + if (HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_Y) + t->buffers[i].tiling = CLIENT_TILING_4; + else if (!HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_4) + t->buffers[i].tiling = CLIENT_TILING_Y; } return 0; @@ -206,7 +356,8 @@ static u64 swizzle_bit(unsigned int bit, u64 offset) static u64 tiled_offset(const struct intel_gt *gt, u64 v, unsigned int stride, - enum client_tiling tiling) + enum client_tiling tiling, + int x_pos, int y_pos) { unsigned int swizzle; u64 x, y; @@ -216,7 +367,12 @@ static u64 tiled_offset(const struct intel_gt *gt, y = div64_u64_rem(v, stride, &x); - if (tiling == CLIENT_TILING_X) { + if (tiling == CLIENT_TILING_4) { + v = linear_x_y_to_ftiled_pos(x_pos, y_pos, stride, 32); + + /* no swizzling for f-tiling */ + swizzle = I915_BIT_6_SWIZZLE_NONE; + } else if (tiling == CLIENT_TILING_X) { v = div64_u64_rem(y, 8, &y) * stride * 8; v += y * 512; v += div64_u64_rem(x, 512, &x) << 12; @@ -259,6 +415,7 @@ static const char *repr_tiling(enum client_tiling tiling) case CLIENT_TILING_LINEAR: return "linear"; case CLIENT_TILING_X: return "X"; case CLIENT_TILING_Y: return "Y"; + case CLIENT_TILING_4: return "F"; default: return "unknown"; } } @@ -284,7 +441,7 @@ static int verify_buffer(const struct tiled_blits *t, } else { u64 v = tiled_offset(buf->vma->vm->gt, p * 4, t->width * 4, - buf->tiling); + buf->tiling, x, y); if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) ret = -EINVAL; @@ -504,6 +661,9 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) if (err) return err; + /* Simulating GTT eviction of the same buffer / layout */ + t->buffers[2].tiling = t->buffers[0].tiling; + /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, &t->buffers[2], t->hole + t->align, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 93a67422ca3b..c6ad67b90e8a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -212,7 +212,7 @@ static int __live_parallel_switch1(void *data) i915_request_add(rq); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 801af51aff62..fe6c37fd7859 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -9,6 +9,7 @@ #include "i915_deps.h" +#include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, @@ -109,7 +110,8 @@ static int igt_same_create_migrate(void *arg) static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, struct drm_i915_gem_object *obj, - struct i915_vma *vma) + struct i915_vma *vma, + bool silent_migrate) { int err; @@ -138,7 +140,8 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { - pr_err("Object failed migration to smem\n"); + if (!silent_migrate) + pr_err("Object failed migration to smem\n"); if (err) return err; } @@ -156,7 +159,8 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, } else { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0); if (err) { - pr_err("Object failed migration to lmem\n"); + if (!silent_migrate) + pr_err("Object failed migration to lmem\n"); if (err) return err; } @@ -179,7 +183,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, struct i915_address_space *vm, struct i915_deps *deps, struct igt_spinner *spin, - struct dma_fence *spin_fence) + struct dma_fence *spin_fence, + bool borked_migrate) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -242,7 +247,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, */ for (i = 1; i <= 5; ++i) { for_i915_gem_ww(&ww, err, true) - err = lmem_pages_migrate_one(&ww, obj, vma); + err = lmem_pages_migrate_one(&ww, obj, vma, + borked_migrate); if (err) goto out_put; } @@ -283,23 +289,70 @@ out_put: static int igt_lmem_pages_failsafe_migrate(void *arg) { - int fail_gpu, fail_alloc, ret; + int fail_gpu, fail_alloc, ban_memcpy, ret; struct intel_gt *gt = arg; for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", - fail_gpu, fail_alloc); - i915_ttm_migrate_set_failure_modes(fail_gpu, - fail_alloc); - ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL); - if (ret) - goto out_err; + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { + pr_info("Simulated failure modes: gpu: %d, alloc:%d, ban_memcpy: %d\n", + fail_gpu, fail_alloc, ban_memcpy); + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = __igt_lmem_pages_migrate(gt, NULL, NULL, + NULL, NULL, + ban_memcpy && + fail_gpu); + + if (ban_memcpy && fail_gpu) { + struct intel_gt *__gt; + unsigned int id; + + if (ret != -EIO) { + pr_err("expected -EIO, got (%d)\n", ret); + ret = -EINVAL; + } else { + ret = 0; + } + + for_each_gt(__gt, gt->i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(&__gt->reset.mutex); + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); + mutex_unlock(&__gt->reset.mutex); + + if (fail_gpu && !fail_alloc) { + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + ret = -EINVAL; + continue; + } + } else if (wedged) { + pr_err("gt(%u) incorrectly wedged\n", id); + ret = -EINVAL; + } else { + continue; + } + + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); + igt_global_reset_lock(__gt); + intel_gt_reset(__gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(__gt); + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); + } + if (ret) + goto out_err; + } + } } } out_err: i915_ttm_migrate_set_failure_modes(false, false); + i915_ttm_migrate_set_ban_memcpy(false); return ret; } @@ -370,7 +423,7 @@ static int igt_async_migrate(struct intel_gt *gt) goto out_ce; err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin, - spin_fence); + spin_fence, false); i915_deps_fini(&deps); dma_fence_put(spin_fence); if (err) @@ -394,23 +447,67 @@ out_spin: #define ASYNC_FAIL_ALLOC 1 static int igt_lmem_async_migrate(void *arg) { - int fail_gpu, fail_alloc, ret; + int fail_gpu, fail_alloc, ban_memcpy, ret; struct intel_gt *gt = arg; for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) { - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", - fail_gpu, fail_alloc); - i915_ttm_migrate_set_failure_modes(fail_gpu, - fail_alloc); - ret = igt_async_migrate(gt); - if (ret) - goto out_err; + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { + pr_info("Simulated failure modes: gpu: %d, alloc: %d, ban_memcpy: %d\n", + fail_gpu, fail_alloc, ban_memcpy); + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = igt_async_migrate(gt); + + if (fail_gpu && ban_memcpy) { + struct intel_gt *__gt; + unsigned int id; + + if (ret != -EIO) { + pr_err("expected -EIO, got (%d)\n", ret); + ret = -EINVAL; + } else { + ret = 0; + } + + for_each_gt(__gt, gt->i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(&__gt->reset.mutex); + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); + mutex_unlock(&__gt->reset.mutex); + + if (fail_gpu && !fail_alloc) { + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + ret = -EINVAL; + continue; + } + } else if (wedged) { + pr_err("gt(%u) incorrectly wedged\n", id); + ret = -EINVAL; + } else { + continue; + } + + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); + igt_global_reset_lock(__gt); + intel_gt_reset(__gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(__gt); + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); + } + } + if (ret) + goto out_err; + } } } out_err: i915_ttm_migrate_set_failure_modes(false, false); + i915_ttm_migrate_set_ban_memcpy(false); return ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..3ced9948a331 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" @@ -21,6 +22,7 @@ #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" #include "selftests/igt_mmap.h" struct tile { @@ -979,6 +981,9 @@ static int igt_mmap(void *arg) }; int i; + if (mr->private) + continue; + for (i = 0; i < ARRAY_SIZE(sizes); i++) { struct drm_i915_gem_object *obj; int err; @@ -1160,6 +1165,7 @@ out_unmap: #define IGT_MMAP_MIGRATE_FILL (1 << 1) #define IGT_MMAP_MIGRATE_EVICTABLE (1 << 2) #define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3) +#define IGT_MMAP_MIGRATE_FAIL_GPU (1 << 4) static int __igt_mmap_migrate(struct intel_memory_region **placements, int n_placements, struct intel_memory_region *expected_mr, @@ -1221,8 +1227,10 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); if (rq) { - dma_resv_add_fence(obj->base.resv, &rq->fence, - DMA_RESV_USAGE_KERNEL); + err = dma_resv_reserve_fences(obj->base.resv, 1); + if (!err) + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } i915_gem_object_unlock(obj); @@ -1232,13 +1240,62 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, if (flags & IGT_MMAP_MIGRATE_EVICTABLE) igt_make_evictable(&objects); + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { + err = i915_gem_object_lock(obj, NULL); + if (err) + goto out_put; + + /* + * Ensure we only simulate the gpu failuire when faulting the + * pages. + */ + err = i915_gem_object_wait_moving_fence(obj, true); + i915_gem_object_unlock(obj); + if (err) + goto out_put; + i915_ttm_migrate_set_failure_modes(true, false); + } + err = ___igt_mmap_migrate(i915, obj, addr, flags & IGT_MMAP_MIGRATE_UNFAULTABLE); + if (!err && obj->mm.region != expected_mr) { pr_err("%s region mismatch %s\n", __func__, expected_mr->name); err = -EINVAL; } + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { + struct intel_gt *gt; + unsigned int id; + + i915_ttm_migrate_set_failure_modes(false, false); + + for_each_gt(gt, i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(>->reset.mutex); + wedged = test_bit(I915_WEDGED, >->reset.flags); + mutex_unlock(>->reset.mutex); + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + err = -EINVAL; + continue; + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + igt_global_reset_lock(gt); + intel_gt_reset(gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(gt); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + } + + if (!i915_gem_object_has_unknown_state(obj)) { + pr_err("object missing unknown_state\n"); + err = -EINVAL; + } + } + out_put: i915_gem_object_put(obj); igt_close_objects(i915, &objects); @@ -1319,6 +1376,23 @@ static int igt_mmap_migrate(void *arg) IGT_MMAP_MIGRATE_TOPDOWN | IGT_MMAP_MIGRATE_FILL | IGT_MMAP_MIGRATE_UNFAULTABLE); + if (err) + goto out_io_size; + + /* + * Allocate in the non-mappable portion, but force migrating to + * the mappable portion on fault (LMEM -> LMEM). We then also + * simulate a gpu error when moving the pages when faulting the + * pages, which should result in wedging the gpu and returning + * SIGBUS in the fault handler, since we can't fallback to + * memcpy. + */ + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, + IGT_MMAP_MIGRATE_TOPDOWN | + IGT_MMAP_MIGRATE_FILL | + IGT_MMAP_MIGRATE_EVICTABLE | + IGT_MMAP_MIGRATE_FAIL_GPU | + IGT_MMAP_MIGRATE_UNFAULTABLE); out_io_size: mr->io_size = saved_io_size; i915_ttm_buddy_man_force_visible_size(man, @@ -1435,6 +1509,9 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1580,6 +1657,9 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1727,6 +1807,9 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; |