diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 204 |
1 files changed, 143 insertions, 61 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 498ef8a7bbc7..2d1cb10d846f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,7 +38,8 @@ #include <linux/dma-buf.h> static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force); static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -62,6 +63,20 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +static bool cpu_cache_is_coherent(struct drm_device *dev, + enum i915_cache_level level) +{ + return HAS_LLC(dev) || level != I915_CACHE_NONE; +} + +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return true; + + return obj->pin_display; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -414,8 +429,7 @@ i915_gem_shmem_pread(struct drm_device *dev, * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush = 1; + needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) @@ -731,19 +745,18 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; + needs_clflush_after = cpu_write_needs_clflush(obj); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ret; } } - /* Same trick applies for invalidate partially written cachelines before - * writing. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) - && obj->cache_level == I915_CACHE_NONE) - needs_clflush_before = 1; + /* Same trick applies to invalidate partially written cachelines read + * before writing. */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + needs_clflush_before = + !cpu_cache_is_coherent(dev, obj->cache_level); ret = i915_gem_object_get_pages(obj); if (ret) @@ -822,8 +835,8 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(dev); + if (i915_gem_clflush_object(obj, obj->pin_display)) + i915_gem_chipset_flush(dev); } } @@ -900,9 +913,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + if (obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -986,6 +999,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, bool wait_forever = true; int ret; + WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) return 0; @@ -1251,8 +1266,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); + if (obj->pin_display) + i915_gem_object_flush_cpu_write_domain(obj, true); drm_gem_object_unreference(&obj->base); unlock: @@ -1622,7 +1637,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) * hope for the best. */ WARN_ON(ret != -EIO); - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, true); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -2188,7 +2203,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, offset = i915_gem_obj_offset(request->batch_obj, request_to_vm(request)); - if (ring->hangcheck.action != wait && + if (ring->hangcheck.action != HANGCHECK_WAIT && i915_request_guilty(request, acthd, &inside)) { DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", ring->name, @@ -2593,6 +2608,9 @@ int i915_vma_unbind(struct i915_vma *vma) if (list_empty(&vma->vma_link)) return 0; + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + if (obj->pin_count) return -EBUSY; @@ -2630,6 +2648,8 @@ int i915_vma_unbind(struct i915_vma *vma) obj->map_and_fenceable = true; drm_mm_remove_node(&vma->node); + +destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if @@ -3088,15 +3108,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - bool mappable, fenceable; size_t gtt_max = map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; - if (WARN_ON(!list_empty(&obj->vma_list))) - return -EBUSY; - fence_size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); @@ -3135,16 +3151,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - /* FIXME: For now we only ever use 1 VMA per object */ BUG_ON(!i915_is_ggtt(vm)); - WARN_ON(!list_empty(&obj->vma_list)); - vma = i915_gem_vma_create(obj, vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; } + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, @@ -3173,18 +3190,19 @@ search_free: list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_add_tail(&vma->mm_list, &vm->inactive_list); - fenceable = - i915_is_ggtt(vm) && - i915_gem_obj_ggtt_size(obj) == fence_size && - (i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0; + if (i915_is_ggtt(vm)) { + bool mappable, fenceable; - mappable = - i915_is_ggtt(vm) && - vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end; + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + obj->base.size <= + dev_priv->gtt.mappable_end); - /* Map and fenceable only changes if the VM is the global GGTT */ - if (i915_is_ggtt(vm)) obj->map_and_fenceable = mappable && fenceable; + } + + WARN_ON(map_and_fenceable && !obj->map_and_fenceable); trace_i915_vma_bind(vma, map_and_fenceable); i915_gem_verify_gtt(dev); @@ -3199,22 +3217,23 @@ err_unpin: return ret; } -void -i915_gem_clflush_object(struct drm_i915_gem_object *obj) +bool +i915_gem_clflush_object(struct drm_i915_gem_object *obj, + bool force) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ if (obj->pages == NULL) - return; + return false; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen) - return; + return false; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -3224,12 +3243,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (obj->cache_level != I915_CACHE_NONE) - return; + if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return false; trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); + + return true; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3261,15 +3281,17 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) /** Flushes the CPU write domain for the object if it's dirty. */ static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) +i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(obj->base.dev); + if (i915_gem_clflush_object(obj, force)) + i915_gem_chipset_flush(obj->base.dev); + old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3302,7 +3324,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, false); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3392,7 +3414,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj, cache_level); } - if (cache_level == I915_CACHE_NONE) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->node.color = cache_level; + obj->cache_level = cache_level; + + if (cpu_write_needs_clflush(obj)) { u32 old_read_domains, old_write_domain; /* If we're coming from LLC cached, then we haven't @@ -3402,7 +3428,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * Just set it to the CPU cache for now. */ WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); - WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; @@ -3415,9 +3440,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, old_write_domain); } - list_for_each_entry(vma, &obj->vma_list, vma_link) - vma->node.color = cache_level; - obj->cache_level = cache_level; i915_gem_verify_gtt(dev); return 0; } @@ -3439,7 +3461,20 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, goto unlock; } - args->caching = obj->cache_level != I915_CACHE_NONE; + switch (obj->cache_level) { + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + args->caching = I915_CACHING_CACHED; + break; + + case I915_CACHE_WT: + args->caching = I915_CACHING_DISPLAY; + break; + + default: + args->caching = I915_CACHING_NONE; + break; + } drm_gem_object_unreference(&obj->base); unlock: @@ -3462,6 +3497,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, case I915_CACHING_CACHED: level = I915_CACHE_LLC; break; + case I915_CACHING_DISPLAY: + level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; + break; default: return -EINVAL; } @@ -3484,6 +3522,22 @@ unlock: return ret; } +static bool is_pin_display(struct drm_i915_gem_object *obj) +{ + /* There are 3 sources that pin objects: + * 1. The display engine (scanouts, sprites, cursors); + * 2. Reservations for execbuffer; + * 3. The user. + * + * We can ignore reservations as we hold the struct_mutex and + * are only called outside of the reservation path. The user + * can only increment pin_count once, and so if after + * subtracting the potential reference by the user, any pin_count + * remains, it must be due to another use by the display engine. + */ + return obj->pin_count - !!obj->user_pin_count; +} + /* * Prepare buffer for display plane (scanout, cursors, etc). * Can be called from an uninterruptible phase (modesetting) and allows @@ -3503,6 +3557,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return ret; } + /* Mark the pin_display early so that we account for the + * display coherency whilst setting up the cache domains. + */ + obj->pin_display = true; + /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all @@ -3512,9 +3571,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * of uncaching, which would allow us to flush all the LLC-cached data * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + ret = i915_gem_object_set_cache_level(obj, + HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - return ret; + goto err_unpin_display; /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we @@ -3522,9 +3582,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); if (ret) - return ret; + goto err_unpin_display; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, true); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; @@ -3540,6 +3600,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, old_write_domain); return 0; + +err_unpin_display: + obj->pin_display = is_pin_display(obj); + return ret; +} + +void +i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin(obj); + obj->pin_display = is_pin_display(obj); } int @@ -3585,7 +3656,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3767,10 +3838,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count++; obj->pin_filp = file; - /* XXX - flush the CPU caches for pinned objects - * as the X server doesn't manage domains yet - */ - i915_gem_object_flush_cpu_write_domain(obj); args->offset = i915_gem_obj_ggtt_offset(obj); out: drm_gem_object_unreference(&obj->base); @@ -3913,6 +3980,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); + INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); obj->ops = ops; @@ -4052,6 +4120,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->vma_link); INIT_LIST_HEAD(&vma->mm_list); + INIT_LIST_HEAD(&vma->exec_list); vma->vm = vm; vma->obj = obj; @@ -4801,3 +4870,16 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, return NULL; } + +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = i915_gem_vma_create(obj, vm); + + return vma; +} |