summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c204
1 files changed, 143 insertions, 61 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 498ef8a7bbc7..2d1cb10d846f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,7 +38,8 @@
#include <linux/dma-buf.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force);
static __must_check int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -62,6 +63,20 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+static bool cpu_cache_is_coherent(struct drm_device *dev,
+ enum i915_cache_level level)
+{
+ return HAS_LLC(dev) || level != I915_CACHE_NONE;
+}
+
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ return true;
+
+ return obj->pin_display;
+}
+
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
@@ -414,8 +429,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens. */
- if (obj->cache_level == I915_CACHE_NONE)
- needs_clflush = 1;
+ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (ret)
@@ -731,19 +745,18 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
- if (obj->cache_level == I915_CACHE_NONE)
- needs_clflush_after = 1;
+ needs_clflush_after = cpu_write_needs_clflush(obj);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
return ret;
}
}
- /* Same trick applies for invalidate partially written cachelines before
- * writing. */
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
- && obj->cache_level == I915_CACHE_NONE)
- needs_clflush_before = 1;
+ /* Same trick applies to invalidate partially written cachelines read
+ * before writing. */
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+ needs_clflush_before =
+ !cpu_cache_is_coherent(dev, obj->cache_level);
ret = i915_gem_object_get_pages(obj);
if (ret)
@@ -822,8 +835,8 @@ out:
*/
if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj);
- i915_gem_chipset_flush(dev);
+ if (i915_gem_clflush_object(obj, obj->pin_display))
+ i915_gem_chipset_flush(dev);
}
}
@@ -900,9 +913,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- if (obj->cache_level == I915_CACHE_NONE &&
- obj->tiling_mode == I915_TILING_NONE &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ if (obj->tiling_mode == I915_TILING_NONE &&
+ obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+ cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@@ -986,6 +999,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
bool wait_forever = true;
int ret;
+ WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
+
if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
return 0;
@@ -1251,8 +1266,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
/* Pinned buffers may be scanout, so flush the cache */
- if (obj->pin_count)
- i915_gem_object_flush_cpu_write_domain(obj);
+ if (obj->pin_display)
+ i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base);
unlock:
@@ -1622,7 +1637,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
* hope for the best.
*/
WARN_ON(ret != -EIO);
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -2188,7 +2203,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
offset = i915_gem_obj_offset(request->batch_obj,
request_to_vm(request));
- if (ring->hangcheck.action != wait &&
+ if (ring->hangcheck.action != HANGCHECK_WAIT &&
i915_request_guilty(request, acthd, &inside)) {
DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
ring->name,
@@ -2593,6 +2608,9 @@ int i915_vma_unbind(struct i915_vma *vma)
if (list_empty(&vma->vma_link))
return 0;
+ if (!drm_mm_node_allocated(&vma->node))
+ goto destroy;
+
if (obj->pin_count)
return -EBUSY;
@@ -2630,6 +2648,8 @@ int i915_vma_unbind(struct i915_vma *vma)
obj->map_and_fenceable = true;
drm_mm_remove_node(&vma->node);
+
+destroy:
i915_gem_vma_destroy(vma);
/* Since the unbound list is global, only move to that list if
@@ -3088,15 +3108,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
drm_i915_private_t *dev_priv = dev->dev_private;
u32 size, fence_size, fence_alignment, unfenced_alignment;
- bool mappable, fenceable;
size_t gtt_max =
map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
struct i915_vma *vma;
int ret;
- if (WARN_ON(!list_empty(&obj->vma_list)))
- return -EBUSY;
-
fence_size = i915_gem_get_gtt_size(dev,
obj->base.size,
obj->tiling_mode);
@@ -3135,16 +3151,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
i915_gem_object_pin_pages(obj);
- /* FIXME: For now we only ever use 1 VMA per object */
BUG_ON(!i915_is_ggtt(vm));
- WARN_ON(!list_empty(&obj->vma_list));
- vma = i915_gem_vma_create(obj, vm);
+ vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unpin;
}
+ /* For now we only ever use 1 vma per object */
+ WARN_ON(!list_is_singular(&obj->vma_list));
+
search_free:
ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
size, alignment,
@@ -3173,18 +3190,19 @@ search_free:
list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
list_add_tail(&vma->mm_list, &vm->inactive_list);
- fenceable =
- i915_is_ggtt(vm) &&
- i915_gem_obj_ggtt_size(obj) == fence_size &&
- (i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0;
+ if (i915_is_ggtt(vm)) {
+ bool mappable, fenceable;
- mappable =
- i915_is_ggtt(vm) &&
- vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end;
+ fenceable = (vma->node.size == fence_size &&
+ (vma->node.start & (fence_alignment - 1)) == 0);
+
+ mappable = (vma->node.start + obj->base.size <=
+ dev_priv->gtt.mappable_end);
- /* Map and fenceable only changes if the VM is the global GGTT */
- if (i915_is_ggtt(vm))
obj->map_and_fenceable = mappable && fenceable;
+ }
+
+ WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
trace_i915_vma_bind(vma, map_and_fenceable);
i915_gem_verify_gtt(dev);
@@ -3199,22 +3217,23 @@ err_unpin:
return ret;
}
-void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+bool
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+ bool force)
{
/* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen
* again at bind time.
*/
if (obj->pages == NULL)
- return;
+ return false;
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
*/
if (obj->stolen)
- return;
+ return false;
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
@@ -3224,12 +3243,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (obj->cache_level != I915_CACHE_NONE)
- return;
+ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ return false;
trace_i915_gem_object_clflush(obj);
-
drm_clflush_sg(obj->pages);
+
+ return true;
}
/** Flushes the GTT write domain for the object if it's dirty. */
@@ -3261,15 +3281,17 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */
static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force)
{
uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
- i915_gem_clflush_object(obj);
- i915_gem_chipset_flush(obj->base.dev);
+ if (i915_gem_clflush_object(obj, force))
+ i915_gem_chipset_flush(obj->base.dev);
+
old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0;
@@ -3302,7 +3324,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, false);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -3392,7 +3414,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj, cache_level);
}
- if (cache_level == I915_CACHE_NONE) {
+ list_for_each_entry(vma, &obj->vma_list, vma_link)
+ vma->node.color = cache_level;
+ obj->cache_level = cache_level;
+
+ if (cpu_write_needs_clflush(obj)) {
u32 old_read_domains, old_write_domain;
/* If we're coming from LLC cached, then we haven't
@@ -3402,7 +3428,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* Just set it to the CPU cache for now.
*/
WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
- WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
old_read_domains = obj->base.read_domains;
old_write_domain = obj->base.write_domain;
@@ -3415,9 +3440,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
old_write_domain);
}
- list_for_each_entry(vma, &obj->vma_list, vma_link)
- vma->node.color = cache_level;
- obj->cache_level = cache_level;
i915_gem_verify_gtt(dev);
return 0;
}
@@ -3439,7 +3461,20 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
goto unlock;
}
- args->caching = obj->cache_level != I915_CACHE_NONE;
+ switch (obj->cache_level) {
+ case I915_CACHE_LLC:
+ case I915_CACHE_L3_LLC:
+ args->caching = I915_CACHING_CACHED;
+ break;
+
+ case I915_CACHE_WT:
+ args->caching = I915_CACHING_DISPLAY;
+ break;
+
+ default:
+ args->caching = I915_CACHING_NONE;
+ break;
+ }
drm_gem_object_unreference(&obj->base);
unlock:
@@ -3462,6 +3497,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
case I915_CACHING_CACHED:
level = I915_CACHE_LLC;
break;
+ case I915_CACHING_DISPLAY:
+ level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
+ break;
default:
return -EINVAL;
}
@@ -3484,6 +3522,22 @@ unlock:
return ret;
}
+static bool is_pin_display(struct drm_i915_gem_object *obj)
+{
+ /* There are 3 sources that pin objects:
+ * 1. The display engine (scanouts, sprites, cursors);
+ * 2. Reservations for execbuffer;
+ * 3. The user.
+ *
+ * We can ignore reservations as we hold the struct_mutex and
+ * are only called outside of the reservation path. The user
+ * can only increment pin_count once, and so if after
+ * subtracting the potential reference by the user, any pin_count
+ * remains, it must be due to another use by the display engine.
+ */
+ return obj->pin_count - !!obj->user_pin_count;
+}
+
/*
* Prepare buffer for display plane (scanout, cursors, etc).
* Can be called from an uninterruptible phase (modesetting) and allows
@@ -3503,6 +3557,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
return ret;
}
+ /* Mark the pin_display early so that we account for the
+ * display coherency whilst setting up the cache domains.
+ */
+ obj->pin_display = true;
+
/* The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
* done with uncached PTEs. This is lowest common denominator for all
@@ -3512,9 +3571,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
* of uncaching, which would allow us to flush all the LLC-cached data
* with that bit in the PTE to main memory with just one PIPE_CONTROL.
*/
- ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+ ret = i915_gem_object_set_cache_level(obj,
+ HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
- return ret;
+ goto err_unpin_display;
/* As the user may map the buffer once pinned in the display plane
* (e.g. libkms for the bootup splash), we have to ensure that we
@@ -3522,9 +3582,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
*/
ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
if (ret)
- return ret;
+ goto err_unpin_display;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3540,6 +3600,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
old_write_domain);
return 0;
+
+err_unpin_display:
+ obj->pin_display = is_pin_display(obj);
+ return ret;
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_unpin(obj);
+ obj->pin_display = is_pin_display(obj);
}
int
@@ -3585,7 +3656,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, false);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}
@@ -3767,10 +3838,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
obj->user_pin_count++;
obj->pin_filp = file;
- /* XXX - flush the CPU caches for pinned objects
- * as the X server doesn't manage domains yet
- */
- i915_gem_object_flush_cpu_write_domain(obj);
args->offset = i915_gem_obj_ggtt_offset(obj);
out:
drm_gem_object_unreference(&obj->base);
@@ -3913,6 +3980,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->global_list);
INIT_LIST_HEAD(&obj->ring_list);
INIT_LIST_HEAD(&obj->exec_list);
+ INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
obj->ops = ops;
@@ -4052,6 +4120,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&vma->vma_link);
INIT_LIST_HEAD(&vma->mm_list);
+ INIT_LIST_HEAD(&vma->exec_list);
vma->vm = vm;
vma->obj = obj;
@@ -4801,3 +4870,16 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
return NULL;
}
+
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm)
+{
+ struct i915_vma *vma;
+
+ vma = i915_gem_obj_to_vma(obj, vm);
+ if (!vma)
+ vma = i915_gem_vma_create(obj, vm);
+
+ return vma;
+}