From c865204e84a1a5c35e055b45971524efe4616e31 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 15 Jun 2021 14:24:08 +0200 Subject: drm/i915/ttm: Fix memory leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two memory leaks introduced with the ttm backend. Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210615122408.32347-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index bf33724bed5c..cdf36d3cf02a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -119,6 +119,7 @@ static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); ttm_tt_destroy_common(bdev, ttm); + ttm_tt_fini(ttm); kfree(i915_tt); } @@ -214,6 +215,7 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) if (likely(obj)) { /* This releases all gem object bindings to the backend. */ + i915_ttm_free_cached_io_st(obj); __i915_gem_free_object(obj); } } -- cgit v1.2.3 From 38f28c0695c0413b701f67105bff2573c667492a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 16 Jun 2021 16:24:57 +0100 Subject: drm/i915/ttm: Calculate the object placement at get_pages time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of relying on a static placement, calculate at get_pages() time. This should work for LMEM regions and system for now. For stolen we need to take preallocated range into account. That will if needed be added later. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 93 ++++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++- drivers/gpu/drm/i915/intel_region_ttm.h | 2 + 3 files changed, 77 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index cdf36d3cf02a..d7595688e182 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -24,6 +24,11 @@ #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN + /** * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. @@ -42,36 +47,71 @@ struct i915_ttm_tt { struct sg_table *cached_st; }; -static const struct ttm_place lmem0_sys_placement_flags[] = { - { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_LMEM0, - .flags = 0, - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_SYSTEM, - .flags = 0, - } -}; - -static struct ttm_placement i915_lmem0_placement = { - .num_placement = 1, - .placement = &lmem0_sys_placement_flags[0], - .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[0], +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, }; static struct ttm_placement i915_sys_placement = { .num_placement = 1, - .placement = &lmem0_sys_placement_flags[1], + .placement = &sys_placement_flags, .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[1], + .busy_placement = &sys_placement_flags, }; static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int num_allowed = obj->mm.n_placements; + unsigned int i; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -89,7 +129,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, ttm_write_combined); + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); if (ret) { kfree(i915_tt); return NULL; @@ -416,10 +457,15 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) .no_wait_gpu = false, }; struct sg_table *st; + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; int ret; + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + /* Move to the requested placement. */ - ret = ttm_bo_validate(bo, &i915_lmem0_placement, &ctx); + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + ret = ttm_bo_validate(bo, &placement, &ctx); if (ret) return ret == -ENOSPC ? -ENXIO : ret; @@ -625,7 +671,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); - bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : ttm_bo_type_kernel; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 27fe0668d094..5a664f6cc93f 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -50,12 +50,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) * driver-private types for now, reserving TTM_PL_VRAM for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ -static int intel_region_to_ttm_type(struct intel_memory_region *mem) +int intel_region_to_ttm_type(const struct intel_memory_region *mem) { int type; GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && - mem->type != INTEL_MEMORY_MOCK); + mem->type != INTEL_MEMORY_MOCK && + mem->type != INTEL_MEMORY_SYSTEM); + + if (mem->type == INTEL_MEMORY_SYSTEM) + return TTM_PL_SYSTEM; type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index e8cf830fda6f..649491844e79 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -28,6 +28,8 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, void intel_region_ttm_node_free(struct intel_memory_region *mem, struct ttm_resource *node); +int intel_region_to_ttm_type(const struct intel_memory_region *mem); + struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST -- cgit v1.2.3 From beb6a22911ff6f7e933670b43e4bda5be56bd8f9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:58 +0100 Subject: drm/i915/ttm: pass along the I915_BO_ALLOC_CONTIGUOUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we just ignore the I915_BO_ALLOC_CONTIGUOUS flag, which is fine since everything is already contiguous with the ttm range manager. However in the next patch we want to switch over to the ttm buddy manager, where allocations are by default not contiguous. v2(Thomas): - Forward ALLOC_CONTIG for all regions Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index d7595688e182..90d342e3df24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -79,10 +79,14 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) static void i915_ttm_place_from_region(const struct intel_memory_region *mr, - struct ttm_place *place) + struct ttm_place *place, + unsigned int flags) { memset(place, 0, sizeof(*place)); place->mem_type = intel_region_to_ttm_type(mr); + + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place->flags = TTM_PL_FLAG_CONTIGUOUS; } static void @@ -92,16 +96,17 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, struct ttm_placement *placement) { unsigned int num_allowed = obj->mm.n_placements; + unsigned int flags = obj->flags; unsigned int i; placement->num_placement = 1; i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : - obj->mm.region, requested); + obj->mm.region, requested, flags); /* Cache this on object? */ placement->num_busy_placement = num_allowed; for (i = 0; i < placement->num_busy_placement; ++i) - i915_ttm_place_from_region(obj->mm.placements[i], busy + i); + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); if (num_allowed == 0) { *busy = *requested; -- cgit v1.2.3 From 687c7d0fcf8014a006416d7dc7474a101a85bf00 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:59 +0100 Subject: drm/i915/ttm: remove node usage in our naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ttm_resource_manager just returns a generic ttm_resource we don't need to reference the mm_node stuff anymore which mostly only makes sense for drm_mm_node. In the next few patches we want switch over to the ttm_buddy_man which is just another type of ttm_resource so reflect that in the naming. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 5 ++-- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/intel_region_ttm.c | 30 ++++++++++++------------ drivers/gpu/drm/i915/intel_region_ttm.h | 14 +++++------ drivers/gpu/drm/i915/selftests/mock_region.c | 16 ++++++------- 5 files changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2a23b77424b3..3a2d9ecf8e03 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -265,9 +265,10 @@ struct drm_i915_gem_object { struct intel_memory_region *region; /** - * Memory manager node allocated for this object. + * Memory manager resource allocated for this object. Only + * needed for the mock region. */ - void *st_mm_node; + struct ttm_resource *res; /** * Element within memory_region->objects or region->purgeable diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 90d342e3df24..20119ea9c418 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -327,7 +327,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, if (man->use_tt) return i915_ttm_tt_get_st(bo->ttm); - return intel_region_ttm_node_to_st(obj->mm.region, res); + return intel_region_ttm_resource_to_st(obj->mm.region, res); } static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 5a664f6cc93f..f9d616544728 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -68,7 +68,7 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) } static struct ttm_resource * -intel_region_ttm_node_reserve(struct intel_memory_region *mem, +intel_region_ttm_resource_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { @@ -100,12 +100,12 @@ intel_region_ttm_node_reserve(struct intel_memory_region *mem, } /** - * intel_region_ttm_node_free - Free a node allocated from a resource manager - * @mem: The region the node was allocated from. - * @node: The opaque node representing an allocation. + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. */ -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *res) +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) { struct ttm_resource_manager *man = mem->region_private; @@ -113,8 +113,8 @@ void intel_region_ttm_node_free(struct intel_memory_region *mem, } static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_node_reserve, - .free = intel_region_ttm_node_free, + .reserve = intel_region_ttm_resource_reserve, + .free = intel_region_ttm_resource_free, }; int intel_region_ttm_init(struct intel_memory_region *mem) @@ -157,10 +157,10 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) } /** - * intel_region_ttm_node_to_st - Convert an opaque TTM resource manager node + * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource * to an sg_table. * @mem: The memory region. - * @node: The resource manager node obtained from the TTM resource manager. + * @res: The resource manager resource obtained from the TTM resource manager. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -168,8 +168,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res) { struct ttm_range_mgr_node *range_node = container_of(res, typeof(*range_node), base); @@ -196,9 +196,9 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, * Return: A valid pointer on success, an error pointer on failure. */ struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { struct ttm_resource_manager *man = mem->region_private; struct ttm_place place = {}; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 649491844e79..6f44075920f2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -22,11 +22,11 @@ int intel_region_ttm_init(struct intel_memory_region *mem); void intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res); -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *node); +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res); int intel_region_to_ttm_type(const struct intel_memory_region *mem); @@ -34,8 +34,8 @@ struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); #endif #endif /* _INTEL_REGION_TTM_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index eafc5a04975c..6120d43fe504 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -16,7 +16,7 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); sg_free_table(pages); kfree(pages); } @@ -30,15 +30,15 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) flags |= I915_ALLOC_CONTIGUOUS; - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); + obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.res)) + return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); return PTR_ERR(pages); } -- cgit v1.2.3 From d53ec322dc7de32a59bf1c2a56b93e90fc2f1c28 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:25:00 +0100 Subject: drm/i915/ttm: switch over to ttm_buddy_man MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move back to the buddy allocator for managing device local memory, and restore the lost mock selftests. Keep around the range manager related bits, since we likely need this for managing stolen at some point. For stolen we also don't need to reserve anything so no need to support a generic reserve interface. v2(Thomas): - bo->page_alignment is in page units, not bytes Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-6-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 20 +-- drivers/gpu/drm/i915/intel_memory_region.c | 55 +------ drivers/gpu/drm/i915/intel_memory_region.h | 17 --- drivers/gpu/drm/i915/intel_region_ttm.c | 122 ++++++--------- .../gpu/drm/i915/selftests/intel_memory_region.c | 170 ++++++++++++++------- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 6 files changed, 180 insertions(+), 216 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 20119ea9c418..b8739f3d3462 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -175,11 +175,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); /* Will do for now. Our pinned objects are still on TTM's LRU lists */ - if (!i915_gem_object_evictable(obj)) - return false; - - /* This isn't valid with a buddy allocator */ - return ttm_bo_eviction_valuable(bo, place); + return i915_gem_object_evictable(obj); } static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, @@ -654,20 +650,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; enum ttm_bo_type bo_type; - size_t alignment = 0; int ret; - /* Adjust alignment to GPU- and CPU huge page sizes. */ - - if (mem->is_range_manager) { - if (size >= SZ_1G) - alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - alignment = SZ_64K >> PAGE_SHIFT; - } - drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); i915_gem_object_init_memory_region(obj, mem); @@ -688,7 +672,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, alignment, + bo_type, &i915_sys_placement, 1, true, NULL, NULL, i915_ttm_bo_destroy); if (!ret) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 12fb5423fd5e..df59f884d37c 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -5,6 +5,7 @@ #include "intel_memory_region.h" #include "i915_drv.h" +#include "i915_ttm_buddy_manager.h" static const struct { u16 class; @@ -28,11 +29,6 @@ static const struct { }, }; -struct intel_region_reserve { - struct list_head link; - struct ttm_resource *res; -}; - struct intel_memory_region * intel_memory_region_lookup(struct drm_i915_private *i915, u16 class, u16 instance) @@ -63,27 +59,6 @@ intel_memory_region_by_type(struct drm_i915_private *i915, return NULL; } -/** - * intel_memory_region_unreserve - Unreserve all previously reserved - * ranges - * @mem: The region containing the reserved ranges. - */ -void intel_memory_region_unreserve(struct intel_memory_region *mem) -{ - struct intel_region_reserve *reserve, *next; - - if (!mem->priv_ops || !mem->priv_ops->free) - return; - - mutex_lock(&mem->mm_lock); - list_for_each_entry_safe(reserve, next, &mem->reserved, link) { - list_del(&reserve->link); - mem->priv_ops->free(mem, reserve->res); - kfree(reserve); - } - mutex_unlock(&mem->mm_lock); -} - /** * intel_memory_region_reserve - Reserve a memory range * @mem: The region for which we want to reserve a range. @@ -96,28 +71,11 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { - int ret; - struct intel_region_reserve *reserve; - - if (!mem->priv_ops || !mem->priv_ops->reserve) - return -EINVAL; - - reserve = kzalloc(sizeof(*reserve), GFP_KERNEL); - if (!reserve) - return -ENOMEM; + struct ttm_resource_manager *man = mem->region_private; - reserve->res = mem->priv_ops->reserve(mem, offset, size); - if (IS_ERR(reserve->res)) { - ret = PTR_ERR(reserve->res); - kfree(reserve); - return ret; - } - - mutex_lock(&mem->mm_lock); - list_add_tail(&reserve->link, &mem->reserved); - mutex_unlock(&mem->mm_lock); + GEM_BUG_ON(mem->is_range_manager); - return 0; + return i915_ttm_buddy_man_reserve(man, offset, size); } struct intel_memory_region * @@ -149,9 +107,6 @@ intel_memory_region_create(struct drm_i915_private *i915, mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); - INIT_LIST_HEAD(&mem->reserved); - - mutex_init(&mem->mm_lock); if (ops->init) { err = ops->init(mem); @@ -182,11 +137,9 @@ static void __intel_memory_region_destroy(struct kref *kref) struct intel_memory_region *mem = container_of(kref, typeof(*mem), kref); - intel_memory_region_unreserve(mem); if (mem->ops->release) mem->ops->release(mem); - mutex_destroy(&mem->mm_lock); mutex_destroy(&mem->objects.lock); kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index c7e635d62e1a..b04fb22726d9 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -59,19 +59,10 @@ struct intel_memory_region_ops { unsigned int flags); }; -struct intel_memory_region_private_ops { - struct ttm_resource *(*reserve)(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size); - void (*free)(struct intel_memory_region *mem, - struct ttm_resource *res); -}; - struct intel_memory_region { struct drm_i915_private *i915; const struct intel_memory_region_ops *ops; - const struct intel_memory_region_private_ops *priv_ops; struct io_mapping iomap; struct resource region; @@ -79,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct mutex mm_lock; - struct kref kref; resource_size_t io_start; @@ -94,8 +83,6 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - struct list_head reserved; - dma_addr_t remap_addr; struct { @@ -103,8 +90,6 @@ struct intel_memory_region { struct list_head list; } objects; - size_t chunk_size; - unsigned int max_order; bool is_range_manager; void *region_private; @@ -138,8 +123,6 @@ __printf(2, 3) void intel_memory_region_set_name(struct intel_memory_region *mem, const char *fmt, ...); -void intel_memory_region_unreserve(struct intel_memory_region *mem); - int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index f9d616544728..052253c81e98 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_scatterlist.h" +#include "i915_ttm_buddy_manager.h" #include "intel_region_ttm.h" @@ -67,72 +68,28 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) return type; } -static struct ttm_resource * -intel_region_ttm_resource_reserve(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size) -{ - struct ttm_resource_manager *man = mem->region_private; - struct ttm_place place = {}; - struct ttm_buffer_object mock_bo = {}; - struct ttm_resource *res; - int ret; - - /* - * Having to use a mock_bo is unfortunate but stems from some - * drivers having private managers that insist to know what the - * allocate memory is intended for, using it to send private - * data to the manager. Also recently the bo has been used to send - * alignment info to the manager. Assume that apart from the latter, - * none of the managers we use will ever access the buffer object - * members, hoping we can pass the alignment info in the - * struct ttm_place in the future. - */ - - place.fpfn = offset >> PAGE_SHIFT; - place.lpfn = place.fpfn + (size >> PAGE_SHIFT); - mock_bo.base.size = size; - ret = man->func->alloc(man, &mock_bo, &place, &res); - if (ret == -ENOSPC) - ret = -ENXIO; - - return ret ? ERR_PTR(ret) : res; -} - /** - * intel_region_ttm_resource_free - Free a resource allocated from a resource manager - * @mem: The region the resource was allocated from. - * @res: The opaque resource representing an allocation. + * intel_region_ttm_init - Initialize a memory region for TTM. + * @mem: The region to initialize. + * + * This function initializes a suitable TTM resource manager for the + * region, and if it's a LMEM region type, attaches it to the TTM + * device. MOCK regions are NOT attached to the TTM device, since we don't + * have one for the mock selftests. + * + * Return: 0 on success, negative error code on failure. */ -void intel_region_ttm_resource_free(struct intel_memory_region *mem, - struct ttm_resource *res) -{ - struct ttm_resource_manager *man = mem->region_private; - - man->func->free(man, res); -} - -static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_resource_reserve, - .free = intel_region_ttm_resource_free, -}; - int intel_region_ttm_init(struct intel_memory_region *mem) { struct ttm_device *bdev = &mem->i915->bdev; int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = ttm_range_man_init(bdev, mem_type, false, - resource_size(&mem->region) >> PAGE_SHIFT); + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(&mem->region), PAGE_SIZE); if (ret) return ret; - mem->chunk_size = PAGE_SIZE; - mem->max_order = - get_order(rounddown_pow_of_two(resource_size(&mem->region))); - mem->is_range_manager = true; - mem->priv_ops = &priv_ops; mem->region_private = ttm_manager_type(bdev, mem_type); return 0; @@ -150,8 +107,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) { int ret; - ret = ttm_range_man_fini(&mem->i915->bdev, - intel_region_to_ttm_type(mem)); + ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, + intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; } @@ -171,12 +128,15 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, struct ttm_resource *res) { - struct ttm_range_mgr_node *range_node = - container_of(res, typeof(*range_node), base); + if (mem->is_range_manager) { + struct ttm_range_mgr_node *range_node = + to_ttm_range_mgr_node(res); - GEM_WARN_ON(!mem->is_range_manager); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_sg_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); + } else { + return i915_sg_from_buddy_resource(res, mem->region.start); + } } #ifdef CONFIG_DRM_I915_SELFTEST @@ -206,25 +166,35 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; - /* - * We ignore the flags for now since we're using the range - * manager and contigous and min page size would be fulfilled - * by default if size is min page size aligned. - */ mock_bo.base.size = size; - - if (mem->is_range_manager) { - if (size >= SZ_1G) - mock_bo.page_alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - mock_bo.page_alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - mock_bo.page_alignment = SZ_64K >> PAGE_SHIFT; - } + mock_bo.page_alignment = 1; + place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) ret = -ENXIO; return ret ? ERR_PTR(ret) : res; } + #endif + +void intel_region_ttm_node_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} + +/** + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. + */ +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index c85d516b85cd..118a66c29695 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -20,7 +20,9 @@ #include "gem/selftests/mock_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "i915_buddy.h" #include "i915_memcpy.h" +#include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" @@ -57,10 +59,9 @@ static int igt_mock_fill(void *arg) LIST_HEAD(objects); int err = 0; - page_size = mem->chunk_size; + page_size = PAGE_SIZE; + max_pages = div64_u64(total, page_size); rem = total; -retry: - max_pages = div64_u64(rem, page_size); for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; @@ -86,11 +87,6 @@ retry: err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { - if (mem->is_range_manager && max_pages > 1) { - max_pages >>= 1; - goto retry; - } - pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; @@ -157,6 +153,7 @@ static bool is_contiguous(struct drm_i915_gem_object *obj) static int igt_mock_reserve(void *arg) { struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; resource_size_t avail = resource_size(&mem->region); struct drm_i915_gem_object *obj; const u32 chunk_size = SZ_32M; @@ -166,16 +163,18 @@ static int igt_mock_reserve(void *arg) LIST_HEAD(objects); int err = 0; - if (!list_empty(&mem->reserved)) { - pr_err("%s region reserved list is not empty\n", __func__); - return -EINVAL; - } - count = avail / chunk_size; order = i915_random_order(count, &prng); if (!order) return 0; + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_close; + } + /* Reserve a bunch of ranges within the region */ for (i = 0; i < count; ++i) { u64 start = order[i] * chunk_size; @@ -205,18 +204,12 @@ static int igt_mock_reserve(void *arg) do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); -retry: size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { - if (PTR_ERR(obj) == -ENXIO) { - if (mem->is_range_manager && - size > mem->chunk_size) { - size >>= 1; - goto retry; - } + if (PTR_ERR(obj) == -ENXIO) break; - } + err = PTR_ERR(obj); goto out_close; } @@ -232,7 +225,7 @@ retry: out_close: kfree(order); close_objects(mem, &objects); - intel_memory_region_unreserve(mem); + intel_memory_region_put(mem); return err; } @@ -252,7 +245,7 @@ static int igt_mock_contiguous(void *arg) total = resource_size(&mem->region); /* Min size */ - obj = igt_object_create(mem, &objects, mem->chunk_size, + obj = igt_object_create(mem, &objects, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -333,17 +326,15 @@ static int igt_mock_contiguous(void *arg) min = target; target = total >> 1; - if (!mem->is_range_manager) { - /* Make sure we can still allocate all the fragmented space */ - obj = igt_object_create(mem, &objects, target, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_close_objects; - } - - igt_object_release(obj); + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; } + igt_object_release(obj); + /* * Even though we have enough free space, we don't have a big enough * contiguous block. Make sure that holds true. @@ -362,7 +353,7 @@ static int igt_mock_contiguous(void *arg) } target >>= 1; - } while (target >= mem->chunk_size); + } while (target >= PAGE_SIZE); err_close_objects: list_splice_tail(&holes, &objects); @@ -374,7 +365,9 @@ static int igt_mock_splintered_region(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; + struct i915_buddy_mm *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; @@ -382,7 +375,7 @@ static int igt_mock_splintered_region(void *arg) /* * Sanity check we can still allocate everything even if the - * max_order != mm.size. i.e our starting address space size is not a + * mm.max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ @@ -391,20 +384,29 @@ static int igt_mock_splintered_region(void *arg) if (IS_ERR(mem)) return PTR_ERR(mem); - expected_order = get_order(rounddown_pow_of_two(size)); - if (mem->max_order != expected_order) { - pr_err("%s order mismatch(%u != %u)\n", - __func__, mem->max_order, expected_order); - err = -EINVAL; - goto out_put; - } - obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_close; } + res = to_ttm_buddy_resource(obj->mm.res); + mm = res->mm; + if (mm->size != size) { + pr_err("%s size mismatch(%llu != %llu)\n", + __func__, mm->size, size); + err = -EINVAL; + goto out_put; + } + + expected_order = get_order(rounddown_pow_of_two(size)); + if (mm->max_order != expected_order) { + pr_err("%s order mismatch(%u != %u)\n", + __func__, mm->max_order, expected_order); + err = -EINVAL; + goto out_put; + } + close_objects(mem, &objects); /* @@ -415,15 +417,12 @@ static int igt_mock_splintered_region(void *arg) * sure that does indeed hold true. */ - if (!mem->is_range_manager) { - obj = igt_object_create(mem, &objects, size, - I915_BO_ALLOC_CONTIGUOUS); - if (!IS_ERR(obj)) { - pr_err("%s too large contiguous allocation was not rejected\n", - __func__); - err = -EINVAL; - goto out_close; - } + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), @@ -442,6 +441,74 @@ out_put: return err; } +#ifndef SZ_8G +#define SZ_8G BIT_ULL(33) +#endif + +static int igt_mock_max_segment(void *arg) +{ + const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; + struct drm_i915_gem_object *obj; + struct i915_buddy_block *block; + struct i915_buddy_mm *mm; + struct list_head *blocks; + struct scatterlist *sg; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * While we may create very large contiguous blocks, we may need + * to break those down for consumption elsewhere. In particular, + * dma-mapping with scatterlist elements have an implicit limit of + * UINT_MAX on each element. + */ + + size = SZ_8G; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_put; + } + + res = to_ttm_buddy_resource(obj->mm.res); + blocks = &res->blocks; + mm = res->mm; + size = 0; + list_for_each_entry(block, blocks, link) { + if (i915_buddy_block_size(mm, block) > size) + size = i915_buddy_block_size(mm, block); + } + if (size < max_segment) { + pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", + __func__, max_segment, size); + err = -EINVAL; + goto out_close; + } + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (sg->length > max_segment) { + pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", + __func__, sg->length, max_segment); + err = -EINVAL; + goto out_close; + } + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -1046,6 +1113,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), + SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 6120d43fe504..3b3264311c91 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -3,6 +3,7 @@ * Copyright © 2019-2021 Intel Corporation */ +#include #include #include @@ -25,10 +26,11 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) { unsigned int flags; struct sg_table *pages; + int err; flags = I915_ALLOC_MIN_PAGE_SIZE; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; + flags |= TTM_PL_FLAG_CONTIGUOUS; obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, obj->base.size, @@ -38,13 +40,17 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); - return PTR_ERR(pages); + err = PTR_ERR(pages); + goto err_free_resource; } __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; + +err_free_resource: + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); + return err; } static const struct drm_i915_gem_object_ops mock_region_obj_ops = { -- cgit v1.2.3 From 13c2ceb6addb6b14468e09b75832c98909eed8e7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:25:01 +0100 Subject: drm/i915/ttm: restore min_page_size behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have bo->page_alignment which perfectly describes what we need if we have min page size restrictions for lmem. We can also drop the flag here, since this is the default behaviour for all objects. v2(Thomas): - bo->page_alignment is in page units Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-7-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/intel_memory_region.h | 3 +-- drivers/gpu/drm/i915/intel_region_ttm.c | 2 +- drivers/gpu/drm/i915/selftests/mock_region.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index b8739f3d3462..9366b18d1bc6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -672,9 +672,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, 1, + bo_type, &i915_sys_placement, + mem->min_page_size >> PAGE_SHIFT, true, NULL, NULL, i915_ttm_bo_destroy); - if (!ret) obj->ttm.created = true; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index b04fb22726d9..2be8433d373a 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -40,8 +40,7 @@ enum intel_region_id { #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) -#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) -#define I915_ALLOC_CONTIGUOUS BIT(1) +#define I915_ALLOC_CONTIGUOUS BIT(0) #define for_each_memory_region(mr, i915, id) \ for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 052253c81e98..d53d78dec2be 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -167,7 +167,7 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, int ret; mock_bo.base.size = size; - mock_bo.page_alignment = 1; + mock_bo.page_alignment = mem->min_page_size >> PAGE_SHIFT; place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 3b3264311c91..fa786dede608 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -28,7 +28,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) struct sg_table *pages; int err; - flags = I915_ALLOC_MIN_PAGE_SIZE; + flags = 0; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) flags |= TTM_PL_FLAG_CONTIGUOUS; -- cgit v1.2.3 From 50331a7b50741035cc9335f863939d638b225e71 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Thu, 17 Jun 2021 08:30:16 +0200 Subject: drm/i915/ttm: accelerated move implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invokes the pipelined page migration through blt, for i915_ttm_move requests of eviction and also obj clear. Signed-off-by: Ramalingam C Reviewed-by: Thomas Hellström Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-11-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 88 ++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 9366b18d1bc6..df46535cca47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -15,6 +15,9 @@ #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_mman.h" +#include "gt/intel_migrate.h" +#include "gt/intel_engine_pm.h" + #define I915_PL_LMEM0 TTM_PL_PRIV #define I915_PL_SYSTEM TTM_PL_SYSTEM #define I915_PL_STOLEN TTM_PL_VRAM @@ -326,6 +329,62 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return intel_region_ttm_resource_to_st(obj->mm.region, res); } +static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct ttm_resource_manager *src_man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct sg_table *src_st; + struct i915_request *rq; + int ret; + + if (!i915->gt.migrate.context) + return -EINVAL; + + if (!bo->ttm || !ttm_tt_is_populated(bo->ttm)) { + if (bo->type == ttm_bo_type_kernel) + return -EINVAL; + + if (bo->ttm && + !(bo->ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return 0; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + dst_st->sgl, I915_CACHE_NONE, + dst_mem->mem_type >= I915_PL_LMEM0, + 0, &rq); + + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } else { + src_st = src_man->use_tt ? i915_ttm_tt_get_st(bo->ttm) : + obj->ttm.cached_io_st; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + NULL, src_st->sgl, I915_CACHE_NONE, + bo->resource->mem_type >= I915_PL_LMEM0, + dst_st->sgl, I915_CACHE_NONE, + dst_mem->mem_type >= I915_PL_LMEM0, + &rq); + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + return ret; +} + static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *dst_mem, @@ -376,19 +435,22 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = dst_man->use_tt ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = src_man->use_tt ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + if (ret) { + /* If we start mapping GGTT, we can no longer use man::use_tt here. */ + dst_iter = dst_man->use_tt ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = src_man->use_tt ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_free_cached_io_st(obj); -- cgit v1.2.3 From b07a6483839a838dc7acff570174053dd544c039 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 18 Jun 2021 15:25:15 +0200 Subject: drm/i915/ttm: Fix incorrect assumptions about ttm_bo_validate() semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have assumed that if the current placement was not the requested placement, but instead one of the busy placements, a TTM move would have been triggered. That is not the case. So when we initially place LMEM objects in "Limbo", (that is system placement without any pages allocated), to be able to defer clearing objects until first get_pages(), the first get_pages() would happily keep objects in system memory if that is one of the allowed placements. And since we don't yet support i915 GEM system memory from TTM, everything breaks apart. So make sure we try the requested placement first, if no eviction is needed. If that fails, retry with all allowed placements also allowing evictions. Also make sure we handle TTM failure codes correctly. Also temporarily (until we support i915 GEM system on TTM), restrict allowed placements to the requested placement to avoid things falling apart should LMEM be full. Fixes: 38f28c0695c0 ("drm/i915/ttm: Calculate the object placement at get_pages time") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210618132515.163277-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 64 +++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index df46535cca47..c5deb8b7227c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -64,6 +64,33 @@ static struct ttm_placement i915_sys_placement = { .busy_placement = &sys_placement_flags, }; +static int i915_ttm_err_to_gem(int err) +{ + /* Fastpath */ + if (likely(!err)) + return 0; + + switch (err) { + case -EBUSY: + /* + * TTM likes to convert -EDEADLK to -EBUSY, and wants us to + * restart the operation, since we don't record the contending + * lock. We use -EAGAIN to restart. + */ + return -EAGAIN; + case -ENOSPC: + /* + * Memory type / region is full, and we can't evict. + * Except possibly system, that returns -ENOMEM; + */ + return -ENXIO; + default: + break; + } + + return err; +} + static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); static enum ttm_caching @@ -522,15 +549,46 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) struct sg_table *st; struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; struct ttm_placement placement; + int real_num_busy; int ret; GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); /* Move to the requested placement. */ i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + /* + * For now we support LMEM only with TTM. + * TODO: Remove with system support + */ + GEM_BUG_ON(requested.mem_type < I915_PL_LMEM0 || + busy[0].mem_type < I915_PL_LMEM0); + + /* First try only the requested placement. No eviction. */ + real_num_busy = fetch_and_zero(&placement.num_busy_placement); ret = ttm_bo_validate(bo, &placement, &ctx); - if (ret) - return ret == -ENOSPC ? -ENXIO : ret; + if (ret) { + ret = i915_ttm_err_to_gem(ret); + /* + * Anything that wants to restart the operation gets to + * do that. + */ + if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || + ret == -EAGAIN) + return ret; + + /* TODO: Remove this when we support system as TTM. */ + real_num_busy = 1; + + /* + * If the initial attempt fails, allow all accepted placements, + * evicting if necessary. + */ + placement.num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) + return i915_ttm_err_to_gem(ret); + } /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; @@ -741,5 +799,5 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, obj->ttm.created = true; /* i915 wants -ENXIO when out of memory region space. */ - return (ret == -ENOSPC) ? -ENXIO : ret; + return i915_ttm_err_to_gem(ret); } -- cgit v1.2.3 From 4bc2d5747eb00320eb3bcdf4cf603504e638c22f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 23 Jun 2021 15:34:11 +0100 Subject: drm/i915/ttm: fix static warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: symbol 'i915_gem_ttm_obj_ops' was not declared. Should it be static? Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210623143411.293630-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c5deb8b7227c..cf5540c1537b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -730,7 +730,7 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) return drm_vma_node_offset_addr(&obj->base.vma_node); } -const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { +static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", .flags = I915_GEM_OBJECT_HAS_IOMEM, -- cgit v1.2.3 From 0ff375759f64a0b81853d9d9b4c5b5b4b06f4a2c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:38 +0200 Subject: drm/i915: Update object placement flags to be mutable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The object ops i915_GEM_OBJECT_HAS_IOMEM and the object I915_BO_ALLOC_STRUCT_PAGE flags are considered immutable by much of our code. Introduce a new mem_flags member to hold these and make sure checks for these flags being set are either done under the object lock or with pages properly pinned. The flags will change during migration under the object lock. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 22 +++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_mman.c | 12 ++++--- drivers/gpu/drm/i915/gem/i915_gem_object.c | 38 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 14 ++------ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 27 +++++++++------ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 7 ++-- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 4 +-- .../gpu/drm/i915/gem/selftests/huge_gem_object.c | 4 +-- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 5 ++- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 25 +++++++++----- drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 17 files changed, 123 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ce6b664b10aa..13b217f75055 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index d539dffa1554..41d5182cd367 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -71,6 +71,28 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_is_lmem - Whether the object is resident in + * lmem while in the fence signaling critical path. + * @obj: The object to check. + * + * This function is intended to be called from within the fence signaling + * path where the fence keeps the object from being migrated. For example + * during gpu reset or similar. + * + * Return: Whether the object is resident in lmem. + */ +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + +#ifdef CONFIG_LOCKDEP + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); +#endif + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index ea76fd11ccb0..27a611deba47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,6 +21,8 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 2fd155742bd2..6497a2dbdab9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -684,7 +684,7 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj, if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return -ENODEV; mmo = mmap_offset_attach(obj, mmap_type, file); @@ -708,7 +708,12 @@ __assign_mmap_offset_handle(struct drm_file *file, if (!obj) return -ENOENT; + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; err = __assign_mmap_offset(obj, mmap_type, offset, file); + i915_gem_object_unlock(obj); +out_put: i915_gem_object_put(obj); return err; } @@ -932,10 +937,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return PTR_ERR(anon); } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - - if (i915_gem_object_has_iomem(obj)) - vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; /* * We keep the ref on mmo->obj, not vm_file, but we require diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index cf18c430d51f..07e8ff9a8aae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -475,6 +475,44 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj) return obj->mm.n_placements > 1; } +/** + * i915_gem_object_has_struct_page - Whether the object is page-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the page backing may change under the caller. + * + * Return: True if page-backed, false otherwise. + */ +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) +{ +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; +} + +/** + * i915_gem_object_has_iomem - Whether the object is iomem-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the iomem backing may change under the caller. + * + * Return: True if iomem-backed, false otherwise. + */ +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) +{ +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_IOMEM; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7bf4dd46d8d2..ea3224a480c4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -148,7 +148,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) /* * If more than one potential simultaneous locker, assert held. */ -static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) { /* * Note mm list lookup is protected by @@ -266,17 +266,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, return obj->ops->flags & flags; } -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; -} +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj); -static inline bool -i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); -} +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 3a2d9ecf8e03..441f913c87e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -33,10 +33,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_MMAP BIT(4) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_MMAP BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -201,17 +200,25 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) -#define I915_BO_ALLOC_CPU_CLEAR BIT(3) -#define I915_BO_ALLOC_USER BIT(4) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER) -#define I915_BO_READONLY BIT(5) -#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */ +#define I915_BO_READONLY BIT(4) +#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ + /** + * @mem_flags - Mutable placement-related flags + * + * These are flags that indicate specifics of the memory region + * the object is currently in. As such they are only stable + * either under the object lock or if the object is pinned. + */ + unsigned int mem_flags; +#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ +#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 086005c1c7ea..f2f850e31b8e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -351,7 +351,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, int err; if (!i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); assert_object_held(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index be72ad0634ba..7986612f48fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); /* We're no longer struct page backed */ - obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; + obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d16c4462fda..7aa1c95c7b7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -444,7 +444,7 @@ shmem_pread(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { - if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + if (i915_gem_object_has_struct_page(obj)) i915_gem_object_release_memory_region(obj); fput(obj->base.filp); @@ -513,9 +513,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index cf5540c1537b..e41bb9d6a491 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -732,7 +732,6 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", - .flags = I915_GEM_OBJECT_HAS_IOMEM, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, @@ -777,6 +776,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_gem_object_make_unshrinkable(obj); obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; + obj->mem_flags |= I915_BO_FLAG_IOMEM; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 4b0acc7eaa27..56edfeff8c02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -510,8 +510,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 0c8ecfdf5405..f963b8e1e37b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &huge_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..ccc67ed1a84b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 44b5de06ce64..607b7d2d4c29 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -831,16 +831,19 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { + bool no_map; + if (type == I915_MMAP_TYPE_GTT && !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) return false; - if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) - return false; + i915_gem_object_lock(obj, NULL); + no_map = (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_has_iomem(obj)); + i915_gem_object_unlock(obj); - return true; + return !no_map; } static void object_set_placements(struct drm_i915_gem_object *obj, @@ -988,10 +991,16 @@ static const char *repr_mmap_type(enum i915_mmap_type type) } } -static bool can_access(const struct drm_i915_gem_object *obj) +static bool can_access(struct drm_i915_gem_object *obj) { - return i915_gem_object_has_struct_page(obj) || - i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); + bool access; + + i915_gem_object_lock(obj, NULL); + access = i915_gem_object_has_struct_page(obj) || + i915_gem_object_has_iomem(obj); + i915_gem_object_unlock(obj); + + return access; } static int __igt_mmap_access(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..d43d8dae0f69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,14 @@ static int mock_phys_object(void *arg) goto out; } + i915_gem_object_lock(obj, NULL); if (!i915_gem_object_has_struct_page(obj)) { + i915_gem_object_unlock(obj); err = -EINVAL; pr_err("shmem has no struct page\n"); goto out_obj; } - i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); i915_gem_object_unlock(obj); if (err) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cb182c6d265a..a2c58b54a592 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1039,7 +1039,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (i915_gem_object_is_lmem(vma->obj)) { + } else if (__i915_gem_object_is_lmem(vma->obj)) { struct intel_memory_region *mem = vma->obj->mm.region; dma_addr_t dma; -- cgit v1.2.3 From 3c2b8f326e7f73dd10ae422dc65603a858f6c6b4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:39 +0200 Subject: drm/i915/ttm: Adjust gem flags and caching settings after a move MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a TTM move or object init we need to update the i915 gem flags and caching settings to reflect the new placement. Currently caching settings are not changed during the lifetime of an object, although that might change moving forward if we run into performance issues or issues with WC system page allocations. Also introduce gpu_binds_iomem() and cpu_maps_iomem() to clean up the various ways we previously used to detect this. Finally, initialize the TTM object reserved to be able to update flags and caching before anyone else gets hold of the object. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 144 ++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e41bb9d6a491..47cd3ee2e262 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -91,6 +91,26 @@ static int i915_ttm_err_to_gem(int err) return err; } +static bool gpu_binds_iomem(struct ttm_resource *mem) +{ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static bool cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); static enum ttm_caching @@ -248,6 +268,35 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) obj->ttm.cached_io_st = NULL; } +static void +i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + static void i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); @@ -263,8 +312,10 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj) /* TTM's purge interface. Note that we might be reentering. */ ret = ttm_bo_validate(bo, &place, &ctx); - if (!ret) { + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); i915_ttm_free_cached_io_st(obj); obj->mm.madv = __I915_MADV_PURGED; } @@ -347,12 +398,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - struct ttm_resource_manager *man = - ttm_manager_type(bo->bdev, res->mem_type); - if (man->use_tt) + if (!gpu_binds_iomem(res)) return i915_ttm_tt_get_st(bo->ttm); + /* + * If CPU mapping differs, we need to add the ttm_tt pages to + * the resulting st. Might make sense for GGTT. + */ + GEM_WARN_ON(!cpu_maps_iomem(res)); return intel_region_ttm_resource_to_st(obj->mm.region, res); } @@ -367,23 +421,25 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct sg_table *src_st; struct i915_request *rq; + struct ttm_tt *ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; int ret; if (!i915->gt.migrate.context) return -EINVAL; - if (!bo->ttm || !ttm_tt_is_populated(bo->ttm)) { + dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + if (!ttm || !ttm_tt_is_populated(ttm)) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (bo->ttm && - !(bo->ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) return 0; intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, - dst_st->sgl, I915_CACHE_NONE, - dst_mem->mem_type >= I915_PL_LMEM0, + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), 0, &rq); if (!ret && rq) { @@ -392,15 +448,16 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, } intel_engine_pm_put(i915->gt.migrate.context->engine); } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(bo->ttm) : - obj->ttm.cached_io_st; + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + obj->ttm.cached_io_st; + src_level = i915_ttm_cache_level(i915, bo->resource, ttm); intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, I915_CACHE_NONE, - bo->resource->mem_type >= I915_PL_LMEM0, - dst_st->sgl, I915_CACHE_NONE, - dst_mem->mem_type >= I915_PL_LMEM0, + NULL, src_st->sgl, src_level, + gpu_binds_iomem(bo->resource), + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), &rq); if (!ret && rq) { i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); @@ -420,8 +477,6 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct ttm_resource_manager *dst_man = ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct ttm_resource_manager *src_man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; @@ -465,12 +520,12 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ret = i915_ttm_accel_move(bo, dst_mem, dst_st); if (ret) { /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = dst_man->use_tt ? + dst_iter = !cpu_maps_iomem(dst_mem) ? ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, dst_st, dst_reg->region.start); - src_iter = src_man->use_tt ? + src_iter = !cpu_maps_iomem(bo->resource) ? ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, obj->ttm.cached_io_st, @@ -478,21 +533,24 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); } + /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); + i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); - if (!dst_man->use_tt) { + if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { obj->ttm.cached_io_st = dst_st; obj->ttm.get_io_page.sg_pos = dst_st->sgl; obj->ttm.get_io_page.sg_idx = 0; } + i915_ttm_adjust_gem_after_move(obj); return 0; } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { - if (mem->mem_type < I915_PL_LMEM0) + if (!cpu_maps_iomem(mem)) return 0; mem->bus.caching = ttm_write_combined; @@ -590,6 +648,16 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) return i915_ttm_err_to_gem(ret); } + i915_ttm_adjust_lru(obj); + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + } + /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; if (IS_ERR(st)) @@ -597,8 +665,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); - i915_ttm_adjust_lru(obj); - return ret; } @@ -768,6 +834,10 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; enum ttm_bo_type bo_type; int ret; @@ -775,14 +845,13 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); i915_gem_object_init_memory_region(obj, mem); i915_gem_object_make_unshrinkable(obj); - obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; - obj->mem_flags |= I915_BO_FLAG_IOMEM; - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : ttm_bo_type_kernel; + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -790,14 +859,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * Similarly, in delayed_destroy, we can't call ttm_bo_put() * until successful initialization. */ - obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); - ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, - mem->min_page_size >> PAGE_SHIFT, - true, NULL, NULL, i915_ttm_bo_destroy); - if (!ret) - obj->ttm.created = true; - - /* i915 wants -ENXIO when out of memory region space. */ - return i915_ttm_err_to_gem(ret); + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + bo_type, &i915_sys_placement, + mem->min_page_size >> PAGE_SHIFT, + &ctx, NULL, NULL, i915_ttm_bo_destroy); + if (ret) + return i915_ttm_err_to_gem(ret); + + obj->ttm.created = true; + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + i915_gem_object_unlock(obj); + + return 0; } -- cgit v1.2.3 From 32b7cf51a441270c62ebaa146c9431e6f155d901 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:40 +0200 Subject: drm/i915/ttm: Use TTM for system memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For discrete, use TTM for both cached and WC system memory. That means we currently rely on the TTM memory accounting / shrinker. For cached system memory we should consider remaining shmem-backed, which can be implemented from our ttm_tt_populate callback. We can then also reuse our own very elaborate shrinker for that memory. If an object is evicted to a gem allowable region, we will now consider the object migrated, and we flip the gem region and move the object to a different region list. Since we are now changing gem regions, we can't any longer rely on the CONTIGUOUS flag being set based on the region min page size, so remove that flag update. If we want to reintroduce it, we need to put it in the mutable flags. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_region.c | 4 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 52 ++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 3 -- drivers/gpu/drm/i915/intel_memory_region.c | 7 +++- drivers/gpu/drm/i915/intel_memory_region.h | 8 +++++ 6 files changed, 58 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index d1f1840540dd..4925563018b4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -13,11 +13,7 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { obj->mm.region = intel_memory_region_get(mem); - if (obj->base.size <= mem->min_page_size) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - mutex_lock(&mem->objects.lock); - list_add(&obj->mm.region_link, &mem->objects.list); mutex_unlock(&mem->objects.lock); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 7aa1c95c7b7d..e9ac913b923a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -302,6 +302,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ struct pagevec pvec; struct page *page; + GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -560,6 +561,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, resource_size_t offset; int err; + GEM_WARN_ON(IS_DGFX(dev_priv)); obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 47cd3ee2e262..c39d982c4fa6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -286,6 +286,26 @@ static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); @@ -615,13 +635,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) /* Move to the requested placement. */ i915_ttm_placement_from_obj(obj, &requested, busy, &placement); - /* - * For now we support LMEM only with TTM. - * TODO: Remove with system support - */ - GEM_BUG_ON(requested.mem_type < I915_PL_LMEM0 || - busy[0].mem_type < I915_PL_LMEM0); - /* First try only the requested placement. No eviction. */ real_num_busy = fetch_and_zero(&placement.num_busy_placement); ret = ttm_bo_validate(bo, &placement, &ctx); @@ -635,9 +648,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) ret == -EAGAIN) return ret; - /* TODO: Remove this when we support system as TTM. */ - real_num_busy = 1; - /* * If the initial attempt fails, allow all accepted placements, * evicting if necessary. @@ -873,3 +883,25 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, return 0; } + +static const struct intel_memory_region_ops ttm_system_region_ops = { + .init_object = __i915_gem_ttm_object_init, +}; + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance) +{ + struct intel_memory_region *mr; + + mr = intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + type, instance, + &ttm_system_region_ops); + if (IS_ERR(mr)) + return mr; + + intel_memory_region_set_name(mr, "system-ttm"); + return mr; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 01e11fe38642..bfbfbae57573 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1751,9 +1751,6 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, - u16 type, u16 instance); - static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index df59f884d37c..779eb2fa90b6 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -173,7 +173,12 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915, type, instance); + if (IS_DGFX(i915)) + mem = i915_gem_ttm_system_setup(i915, type, + instance); + else + mem = i915_gem_shmem_setup(i915, type, + instance); break; case INTEL_MEMORY_STOLEN_LOCAL: mem = i915_gem_stolen_lmem_setup(i915, type, instance); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2be8433d373a..b1b9e461d53b 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -125,4 +125,12 @@ intel_memory_region_set_name(struct intel_memory_region *mem, int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance); +struct intel_memory_region * +i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance); + #endif -- cgit v1.2.3 From b6e913e19c54eddd6a4d637969f5c079effb74c6 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 29 Jun 2021 17:12:01 +0200 Subject: drm/i915/gem: Implement object migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce an interface to migrate objects between regions. This is primarily intended to migrate objects to LMEM for display and to SYSTEM for dma-buf, but might be reused in one form or another for performance-based migration. v2: - Verify that the memory region given as an id really exists. (Reported by Matthew Auld) - Call i915_gem_object_{init,release}_memory_region() when switching region to handle also switching region lists. (Reported by Matthew Auld) v3: - Fix i915_gem_object_can_migrate() to return true if object is already in the correct region, even if the object ops doesn't have a migrate() callback. - Update typo in commit message. - Fix kerneldoc of i915_gem_object_wait_migration(). v4: - Improve documentation (Suggested by Mattew Auld and Michael Ruhl) - Always assume TTM migration hits a TTM move and unsets the pages through move_notify. (Reported by Matthew Auld) - Add a dma_fence_might_wait() annotation to i915_gem_object_wait_migration() (Suggested by Daniel Vetter) v5: - Re-add might_sleep() instead of __dma_fence_might_wait(), Sent v4 with the wrong version, didn't compile and __dma_fence_might_wait() is not exported. - Added an R-B. Reported-by: kernel test robot Signed-off-by: Thomas Hellström Reviewed-by: Michael J. Ruhl Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210629151203.209465-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 112 +++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 77 +++++++++++++--- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 19 ++++ 5 files changed, 217 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 07e8ff9a8aae..225b77fb4314 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -513,6 +513,118 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) return obj->mem_flags & I915_BO_FLAG_IOMEM; } +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate objects and might be slightly less permissive + * than i915_gem_object_migrate() when it comes to objects with the + * I915_BO_ALLOC_USER flag set. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + mr = i915->mm.regions[id]; + if (!mr) + return false; + + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!obj->ops->migrate) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * This function is a bit more permissive than i915_gem_object_can_migrate() + * to allow for migrating objects where the caller knows exactly what is + * happening. For example within selftests. More specifically this + * function allows migrating I915_BO_ALLOC_USER objects to regions + * that are not in the list of allowable regions. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + GEM_BUG_ON(!mr); + + if (obj->mm.region == mr) + return 0; + + if (!i915_gem_object_evictable(obj)) + return -EBUSY; + + if (!obj->ops->migrate) + return -EOPNOTSUPP; + + return obj->ops->migrate(obj, mr); +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index ea3224a480c4..8cbd7a5334e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -17,6 +17,8 @@ #include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -597,6 +599,16 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj); +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id); + +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id); + +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 441f913c87e6..ef3de2ae9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -18,6 +18,7 @@ struct drm_i915_gem_object; struct intel_fronbuffer; +struct intel_memory_region; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -77,6 +78,14 @@ struct drm_i915_gem_object_ops { * delayed_free - Override the default delayed free implementation */ void (*delayed_free)(struct drm_i915_gem_object *obj); + + /** + * migrate - Migrate object to a different region either for + * pinning or for as long as the object lock is held. + */ + int (*migrate)(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr); + void (*release)(struct drm_i915_gem_object *obj); const struct vm_operations_struct *mmap_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..521ab740001a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -617,7 +617,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; } -static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -625,19 +626,12 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) .no_wait_gpu = false, }; struct sg_table *st; - struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; - struct ttm_placement placement; int real_num_busy; int ret; - GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); - - /* Move to the requested placement. */ - i915_ttm_placement_from_obj(obj, &requested, busy, &placement); - /* First try only the requested placement. No eviction. */ - real_num_busy = fetch_and_zero(&placement.num_busy_placement); - ret = ttm_bo_validate(bo, &placement, &ctx); + real_num_busy = fetch_and_zero(&placement->num_busy_placement); + ret = ttm_bo_validate(bo, placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -652,8 +646,8 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement.num_busy_placement = real_num_busy; - ret = ttm_bo_validate(bo, &placement, &ctx); + placement->num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); } @@ -668,6 +662,7 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) i915_ttm_adjust_gem_after_move(obj); } + GEM_WARN_ON(obj->mm.pages); /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; if (IS_ERR(st)) @@ -678,6 +673,63 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) return ret; } +static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +{ + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; + + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + + /* Move to the requested placement. */ + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + return __i915_ttm_get_pages(obj, &placement); +} + +/** + * DOC: Migration vs eviction + * + * GEM migration may not be the same as TTM migration / eviction. If + * the TTM core decides to evict an object it may be evicted to a + * TTM memory type that is not in the object's allowable GEM regions, or + * in fact theoretically to a TTM memory type that doesn't correspond to + * a GEM memory region. In that case the object's GEM region is not + * updated, and the data is migrated back to the GEM region at + * get_pages time. TTM may however set up CPU ptes to the object even + * when it is evicted. + * Gem forced migration using the i915_ttm_migrate() op, is allowed even + * to regions that are not in the object's list of allowable placements. + */ +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + struct ttm_place requested; + struct ttm_placement placement; + int ret; + + i915_ttm_place_from_region(mr, &requested, obj->flags); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + ret = __i915_ttm_get_pages(obj, &placement); + if (ret) + return ret; + + /* + * Reinitialize the region bindings. This is primarily + * required for objects where the new region is not in + * its allowable placements. + */ + if (obj->mm.region != mr) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + } + + return 0; +} + static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st) { @@ -814,6 +866,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .truncate = i915_ttm_purge, .adjust_lru = i915_ttm_adjust_lru, .delayed_free = i915_ttm_delayed_free, + .migrate = i915_ttm_migrate, .mmap_offset = i915_ttm_mmap_offset, .mmap_ops = &vm_ops_ttm, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1070d3afdce7..f909aaa09d9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); return ret; } + +/** + * i915_gem_object_wait_migration - Sync an accelerated migration operation + * @obj: The migrating object. + * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE. + * + * Wait for any pending async migration operation on the object, + * whether it's explicitly (i915_gem_object_migrate()) or implicitly + * (swapin, initial clearing) initiated. + * + * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting. + */ +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + might_sleep(); + /* NOP for now. */ + return 0; +} -- cgit v1.2.3 From d22632c83b948e4f7a3d4202a884be2409098cc2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Jun 2021 11:38:23 +0100 Subject: drm/i915: support forcing the page size with lmem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some specialised objects we might need something larger than the regions min_page_size due to some hw restriction, and slightly more hairy is needing something smaller with the guarantee that such objects will never be inserted into any GTT, which is the case for the paging structures. This also fixes how we setup the BO page_alignment, if we later migrate the object somewhere else. For example if the placements are {SMEM, LMEM}, then we might get this wrong. Pushing the min_page_size behaviour into the manager should fix this. v2(Thomas): push the default page size behaviour into buddy_man, and let the user override it with the page-alignment, which looks cleaner v3: rebase on ttm sys changes Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210625103824.558481-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 33 +++++++++++- drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 5 ++ drivers/gpu/drm/i915/gem/i915_gem_region.c | 13 ++++- drivers/gpu/drm/i915/gem/i915_gem_region.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 ++- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 3 +- .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 +-- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 14 ++++- drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 +- drivers/gpu/drm/i915/intel_memory_region.h | 1 + drivers/gpu/drm/i915/intel_region_ttm.c | 4 +- .../gpu/drm/i915/selftests/intel_memory_region.c | 63 ++++++++++++++++++++-- drivers/gpu/drm/i915/selftests/mock_region.c | 1 + 18 files changed, 144 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 93bf63bbaff1..51f92e4b1a69 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -90,7 +90,7 @@ i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) */ flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, flags); + ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index be1d122574af..eb345305dc52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -72,11 +72,42 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the + * minimum page size for the backing pages. + * @i915: The i915 instance. + * @size: The size in bytes for the object. Note that we need to round the size + * up depending on the @page_size. The final object size can be fished out from + * the drm GEM object. + * @page_size: The requested minimum page size in bytes for this object. This is + * useful if we need something bigger than the regions min_page_size due to some + * hw restriction, or in some very specialised cases where it needs to be + * smaller, where the internal fragmentation cost is too great when rounding up + * the object size. + * @flags: The optional BO allocation flags. + * + * Note that this interface assumes you know what you are doing when forcing the + * @page_size. If this is smaller than the regions min_page_size then it can + * never be inserted into any GTT, otherwise it might lead to undefined + * behaviour. + * + * Return: The object pointer, which might be an ERR_PTR in the case of failure. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, page_size, flags); +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], - size, flags); + size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 27a611deba47..4ee81fc66302 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -23,6 +23,11 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 4925563018b4..1f557b2178ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -32,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_gem_object *obj; + resource_size_t default_page_size; int err; /* @@ -48,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!mem) return ERR_PTR(-ENODEV); - size = round_up(size, mem->min_page_size); + default_page_size = mem->min_page_size; + if (page_size) + default_page_size = page_size; + + GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); + GEM_BUG_ON(default_page_size < PAGE_SIZE); + + size = round_up(size, default_page_size); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -60,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); - err = mem->ops->init_object(mem, obj, size, flags); + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 84fcb3297400..1008e580a89a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e9ac913b923a..6a04cce188fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -490,6 +490,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -548,7 +549,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], - size, 0); + size, 0, 0); } /* Allocate a new GEM object and fill it with the supplied data */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0c3a7dc60d1..90708de27684 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -670,6 +670,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_private *i915 = mem->i915; @@ -708,7 +709,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0); } static int init_stolen_smem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 521ab740001a..6589411396d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -893,6 +893,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -915,6 +916,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + /* Forcing the page size is kernel internal only */ + GEM_BUG_ON(page_size && obj->mm.n_placements); + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -924,7 +928,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, bo_type, &i915_sys_placement, - mem->min_page_size >> PAGE_SHIFT, + page_size >> PAGE_SHIFT, &ctx, NULL, NULL, i915_ttm_bo_destroy); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index b8d3dcbb50df..40927f67b6d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -44,5 +44,6 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo) int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index ccc67ed1a84b..a094f3ce1a90 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -496,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg) int i; for (i = 0; i < ARRAY_SIZE(flags); ++i) { - obj = i915_gem_object_create_region(mem, page_size, + obj = i915_gem_object_create_region(mem, + page_size, page_size, flags[i]); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ced6e3a814a2..0b7144d2991c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -48,7 +48,7 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, GEM_BUG_ON(!src_mr); /* Switch object backing-store on create */ - obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0); + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 607b7d2d4c29..1da8bd675e54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -958,7 +958,7 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, sizes[i], 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1084,7 +1084,7 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1229,7 +1229,7 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1384,7 +1384,7 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index fc7ad5c035b8..6877362f6b85 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -18,6 +18,7 @@ struct i915_ttm_buddy_manager { struct i915_buddy_mm mm; struct list_head reserved; struct mutex lock; + u64 default_page_size; }; static struct i915_ttm_buddy_manager * @@ -53,7 +54,10 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, GEM_BUG_ON(!bman_res->base.num_pages); size = bman_res->base.num_pages << PAGE_SHIFT; - min_page_size = bo->page_alignment << PAGE_SHIFT; + min_page_size = bman->default_page_size; + if (bo->page_alignment) + min_page_size = bo->page_alignment << PAGE_SHIFT; + GEM_BUG_ON(min_page_size < mm->chunk_size); min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { @@ -134,6 +138,9 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { * @type: Memory type we want to manage * @use_tt: Set use_tt for the manager * @size: The size in bytes to manage + * @default_page_size: The default minimum page size in bytes for allocations, + * this must be at least as large as @chunk_size, and can be overridden by + * setting the BO page_alignment, to be larger or smaller as needed. * @chunk_size: The minimum page size in bytes for our allocations i.e * order-zero * @@ -154,7 +161,8 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { */ int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned int type, bool use_tt, - u64 size, u64 chunk_size) + u64 size, u64 default_page_size, + u64 chunk_size) { struct ttm_resource_manager *man; struct i915_ttm_buddy_manager *bman; @@ -170,6 +178,8 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev, mutex_init(&bman->lock); INIT_LIST_HEAD(&bman->reserved); + GEM_BUG_ON(default_page_size < chunk_size); + bman->default_page_size = default_page_size; man = &bman->manager; man->use_tt = use_tt; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 26026213e20a..0722d33f3e14 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -46,7 +46,7 @@ to_ttm_buddy_resource(struct ttm_resource *res) int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned type, bool use_tt, - u64 size, u64 chunk_size); + u64 size, u64 default_page_size, u64 chunk_size); int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index b1b9e461d53b..1f2b96efa69d 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -55,6 +55,7 @@ struct intel_memory_region_ops { int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); }; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 4cd10f364e84..98c7339bf8ba 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -86,7 +86,8 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int ret; ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(&mem->region), PAGE_SIZE); + resource_size(&mem->region), + mem->min_page_size, PAGE_SIZE); if (ret) return ret; @@ -167,7 +168,6 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, int ret; mock_bo.base.size = size; - mock_bo.page_alignment = mem->min_page_size >> PAGE_SHIFT; place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ecc3b9e6c22b..1aaccb9841a0 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -68,7 +68,7 @@ static int igt_mock_fill(void *arg) resource_size_t size = page_num * page_size; struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_region(mem, size, 0); + obj = i915_gem_object_create_region(mem, size, 0, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -110,7 +110,7 @@ igt_object_create(struct intel_memory_region *mem, struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mem, size, flags); + obj = i915_gem_object_create_region(mem, size, 0, flags); if (IS_ERR(obj)) return obj; @@ -647,6 +647,62 @@ out_put: return err; } +static int igt_lmem_create_with_ps(void *arg) +{ + struct drm_i915_private *i915 = arg; + int err = 0; + u32 ps; + + for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { + struct drm_i915_gem_object *obj; + dma_addr_t daddr; + + obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENXIO || err == -E2BIG) { + pr_info("%s not enough lmem for ps(%u) err=%d\n", + __func__, ps, err); + err = 0; + } + + break; + } + + if (obj->base.size != ps) { + pr_err("%s size(%zu) != ps(%u)\n", + __func__, obj->base.size, ps); + err = -EINVAL; + goto out_put; + } + + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + daddr = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s daddr(%pa) not aligned with ps(%u)\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_unpin; + } + +out_unpin: + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); +out_put: + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + + if (err) + break; + } + + return err; +} + static int igt_lmem_create_cleared_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -932,7 +988,7 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, struct drm_i915_gem_object *obj; void *addr; - obj = i915_gem_object_create_region(mr, size, 0); + obj = i915_gem_object_create_region(mr, size, 0, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ return ERR_PTR(-ENODEV); @@ -1149,6 +1205,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_create_with_ps), SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index fa786dede608..efa86dffe3c6 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -63,6 +63,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; -- cgit v1.2.3 From 7d6a276e2fa9579e0fd63931a6e8388e3171cecd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:17 -0500 Subject: drm/i915: Remove allow_alloc from i915_gem_object_get_sg* This reverts the rest of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer"). Now that the only user of i915_gem_object_get_sg without allow_alloc has been removed, we can drop the parameter. This portion of the revert was broken into its own patch to aid review. Signed-off-by: Jason Ekstrand Cc: Maarten Lankhorst Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-4-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 10 +++++----- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 20 ++++---------------- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- 4 files changed, 11 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 8be4fadeee48..f3ede43282dc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -342,22 +342,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset, bool allow_alloc, bool dma); + unsigned int *offset, bool dma); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc, false); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc, true); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); } struct page * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 0c9d28423d45..8eb1c3a6fc9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -494,7 +494,7 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, unsigned int *offset, - bool allow_alloc, bool dma) + bool dma) { struct scatterlist *sg; unsigned int idx, count; @@ -516,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; - if (!allow_alloc) - goto manual_lookup; - mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -568,16 +565,7 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - goto manual_walk; - -manual_lookup: - idx = 0; - sg = obj->mm.pages->sgl; - count = __sg_page_count(sg); - -manual_walk: - /* - * In case we failed to insert the entry into the radixtree, we need + /* In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -624,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset, true); + sg = i915_gem_object_get_sg(obj, n, &offset); return nth_page(sg_page(sg), offset); } @@ -650,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 6589411396d3..f253b11e9e36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -589,7 +589,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, GEM_WARN_ON(bo->ttm); - sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true, true); + sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20e46b843324..9d445ad9a342 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1494,7 +1494,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset, true); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; -- cgit v1.2.3 From 75e382850b7ea516cbeaecf2dd22dd040e144ad9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:39 -0500 Subject: drm/i915/gem/ttm: Only call __i915_gem_object_set_pages if needed __i915_ttm_get_pages does two things. First, it calls ttm_bo_validate() to check the given placement and migrate the BO if needed. Then, it updates the GEM object to match, in case the object was migrated. If no migration occured, however, we might still have pages on the GEM object in which case we don't need to fetch them from TTM and call __i915_gem_object_set_pages. This hasn't been a problem before because the primary user of __i915_ttm_get_pages is __i915_gem_object_get_pages which only calls it if the GEM object doesn't have pages. However, i915_ttm_migrate also uses __i915_ttm_get_pages to do the migration so this meant it was unsafe to call on an already populated object. This patch checks i915_gem_object_has_pages() before trying to __i915_gem_object_set_pages so i915_ttm_migrate is safe to call, even on populated objects. Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-6-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index f253b11e9e36..771eb2963123 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -662,13 +662,14 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, i915_ttm_adjust_gem_after_move(obj); } - GEM_WARN_ON(obj->mm.pages); - /* Object either has a page vector or is an iomem object */ - st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; - if (IS_ERR(st)) - return PTR_ERR(st); + if (!i915_gem_object_has_pages(obj)) { + /* Object either has a page vector or is an iomem object */ + st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; + if (IS_ERR(st)) + return PTR_ERR(st); - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + } return ret; } -- cgit v1.2.3