summaryrefslogtreecommitdiff
path: root/lib/intel_batchbuffer.c
diff options
context:
space:
mode:
authorZbigniew Kempczyński <zbigniew.kempczynski@intel.com>2020-05-20 13:26:37 +0200
committerChris Wilson <chris@chris-wilson.co.uk>2020-05-20 18:59:34 +0100
commit55b8a22bbd3f08ca5450ce9130b5f35a2d721301 (patch)
treed48fe088d90e3a4050ecab758c9c239b851291bc /lib/intel_batchbuffer.c
parentff4dd1a526815d871e916557d3af986067d1cf76 (diff)
lib/intel_batchbuffer: Introduce intel_bb
Simple batchbuffer facility which gathers and outputs relocations. v2: make bb api more consistent and universal v3: fix compiling issues on non-x86 arch v4: add indexing tree and marking object as render target v5: randomizing addresses to avoid relocations v6: fix invalid reallocation size (Chris) Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'lib/intel_batchbuffer.c')
-rw-r--r--lib/intel_batchbuffer.c477
1 files changed, 477 insertions, 0 deletions
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index f1a45b47..effcff4f 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
+#include <search.h>
#include "drm.h"
#include "drmtest.h"
@@ -41,9 +42,11 @@
#include "rendercopy.h"
#include "media_fill.h"
#include "ioctl_wrappers.h"
+#include "i915/gem_mman.h"
#include "media_spin.h"
#include "gpgpu_fill.h"
#include "igt_aux.h"
+#include "igt_rand.h"
#include "i830_reg.h"
#include <i915_drm.h>
@@ -1171,3 +1174,477 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
return spin;
}
+
+/* Intel batchbuffer v2 */
+static bool intel_bb_debug_tree = false;
+
+/*
+ * __reallocate_objects:
+ * @ibb: pointer to intel_bb
+ *
+ * Increases number of objects if necessary.
+ */
+static void __reallocate_objects(struct intel_bb *ibb)
+{
+ uint32_t num;
+
+ if (ibb->num_objects == ibb->allocated_objects) {
+ num = 4096 / sizeof(*ibb->objects);
+ ibb->objects = realloc(ibb->objects,
+ sizeof(*ibb->objects) *
+ (num + ibb->allocated_objects));
+ igt_assert(ibb->objects);
+ ibb->allocated_objects += num;
+
+ memset(&ibb->objects[ibb->num_objects], 0,
+ num * sizeof(*ibb->objects));
+ }
+}
+
+/**
+ * __intel_bb_create:
+ * @i915: drm fd
+ * @size: size of the batchbuffer
+ *
+ * Returns:
+ *
+ * Pointer the intel_bb, asserts on failure.
+ */
+struct intel_bb *intel_bb_create(int i915, uint32_t size)
+{
+ struct intel_bb *ibb = calloc(1, sizeof(*ibb));
+ uint64_t gtt_size;
+
+ igt_assert(ibb);
+
+ ibb->i915 = i915;
+ ibb->devid = intel_get_drm_devid(i915);
+ ibb->gen = intel_gen(ibb->devid);
+ ibb->handle = gem_create(i915, size);
+ ibb->size = size;
+ ibb->batch = calloc(1, size);
+ igt_assert(ibb->batch);
+ ibb->ptr = ibb->batch;
+ ibb->prng = (uint32_t) to_user_pointer(ibb);
+
+ gtt_size = gem_aperture_size(i915);
+ if (!gem_uses_full_ppgtt(i915))
+ gtt_size /= 2;
+ if ((gtt_size - 1) >> 32)
+ ibb->supports_48b_address = true;
+ ibb->gtt_size = gtt_size;
+
+ __reallocate_objects(ibb);
+ intel_bb_add_object(ibb, ibb->handle, 0, false);
+
+ return ibb;
+}
+
+/*
+ * tdestroy() calls free function for each node, but we spread tree
+ * on objects array, so do nothing.
+ */
+static void __do_nothing(void *node)
+{
+ (void) node;
+}
+
+/**
+ * intel_bb_destroy:
+ * @ibb: pointer to intel_bb
+ *
+ * Frees all relocations / objects allocated during filling the batch.
+ */
+void intel_bb_destroy(struct intel_bb *ibb)
+{
+ uint32_t i;
+
+ igt_assert(ibb);
+
+ /* Free relocations */
+ for (i = 0; i < ibb->num_objects; i++)
+ free(from_user_pointer(ibb->objects[i].relocs_ptr));
+
+ free(ibb->objects);
+ tdestroy(ibb->root, __do_nothing);
+
+ munmap(ibb->batch, ibb->size);
+ gem_close(ibb->i915, ibb->handle);
+
+ free(ibb);
+}
+
+/**
+ * intel_bb_set_debug:
+ * @ibb: pointer to intel_bb
+ * @debug: true / false
+ *
+ * Sets debug to true / false. Execbuf is then called synchronously and
+ * object/reloc arrays are printed after execution.
+ */
+void intel_bb_set_debug(struct intel_bb *ibb, bool debug)
+{
+ ibb->debug = debug;
+}
+
+static int __compare_objects(const void *p1, const void *p2)
+{
+ const struct drm_i915_gem_exec_object2 *o1 = p1, *o2 = p2;
+
+ return (int) ((int64_t) o1->handle - (int64_t) o2->handle);
+}
+
+/**
+ * intel_bb_add_object:
+ * @ibb: pointer to intel_bb
+ * @handle: which handle to add to objects array
+ * @offset: presumed offset of the object when I915_EXEC_NO_RELOC flag is
+ * used in execbuf call
+ * @write: does a handle is a render target
+ *
+ * Function adds or updates execobj slot in bb objects array and
+ * in the object tree. When object is a render target it has to
+ * be marked with EXEC_OBJECT_WRITE flag.
+ */
+struct drm_i915_gem_exec_object2 *
+intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
+ uint64_t offset, bool write)
+{
+ struct drm_i915_gem_exec_object2 *object;
+ struct drm_i915_gem_exec_object2 **found;
+ uint32_t i;
+
+ __reallocate_objects(ibb);
+
+ i = ibb->num_objects;
+ object = &ibb->objects[i];
+ object->handle = handle;
+
+ found = tsearch((void *) object, &ibb->root, __compare_objects);
+
+ if (*found == object)
+ ibb->num_objects++;
+ else
+ object = *found;
+
+ /* Assign address once */
+ if (object->offset == 0) {
+ if (offset) {
+ object->offset = offset;
+ } else {
+ /* randomize the address, we try to avoid relocations */
+ offset = hars_petruska_f54_1_random64(&ibb->prng);
+ offset &= (ibb->gtt_size - 1);
+ offset &= ~(4096 - 1);
+ object->offset = offset;
+ }
+ }
+
+ if (write)
+ object->flags |= EXEC_OBJECT_WRITE;
+
+ if (ibb->supports_48b_address)
+ object->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+ return object;
+}
+
+/*
+ * intel_bb_add_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @delta: delta value to add to @buffer's gpu address
+ * @offset: offset within bb to be patched
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ *
+ * Function allocates additional relocation slot in reloc array for a handle.
+ * It also implicitly adds handle in the objects array if object doesn't
+ * exists but doesn't mark it as a render target.
+ */
+static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
+ uint32_t handle,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint64_t delta,
+ uint64_t offset,
+ uint64_t presumed_offset)
+{
+ struct drm_i915_gem_relocation_entry *relocs;
+ struct drm_i915_gem_exec_object2 *object;
+ uint32_t i;
+
+ object = intel_bb_add_object(ibb, handle, presumed_offset, false);
+
+ relocs = ibb->relocs;
+ if (ibb->num_relocs == ibb->allocated_relocs) {
+ ibb->allocated_relocs += 4096 / sizeof(*relocs);
+ relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs);
+ igt_assert(relocs);
+ ibb->relocs = relocs;
+ }
+
+ i = ibb->num_relocs++;
+ memset(&relocs[i], 0, sizeof(*relocs));
+ relocs[i].target_handle = handle;
+ relocs[i].read_domains = read_domains;
+ relocs[i].write_domain = write_domain;
+ relocs[i].delta = delta;
+ relocs[i].offset = offset;
+ relocs[i].presumed_offset = object->offset;
+
+ igt_debug("add reloc: handle: %u, r/w: 0x%x/0x%x, "
+ "delta: 0x%" PRIx64 ", "
+ "offset: 0x%" PRIx64 ", "
+ "poffset: 0x%" PRIx64 "\n",
+ handle, read_domains, write_domain,
+ delta, offset, presumed_offset);
+
+ return object->offset;
+}
+
+/**
+ * intel_bb_emit_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @delta: delta value to add to @buffer's gpu address
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ * @write: does a handle is a render target
+ *
+ * Function prepares relocation (execobj if required + reloc) and emits
+ * offset in bb. For I915_EXEC_NO_RELOC presumed_offset is a hint we already
+ * have object in valid place and relocation step can be skipped in this case.
+ *
+ * Note: delta is value added to address, mostly used when some instructions
+ * require modify-bit set to apply change. Which delta is valid depends
+ * on instruction (see instruction specification).
+ */
+uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
+ uint32_t handle,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint64_t delta,
+ uint64_t presumed_offset)
+{
+ uint64_t address;
+
+ igt_assert(ibb);
+
+ address = intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
+ delta, intel_bb_offset(ibb),
+ presumed_offset);
+
+ intel_bb_out(ibb, delta + address);
+ if (ibb->gen >= 8)
+ intel_bb_out(ibb, address >> 32);
+
+ return address;
+}
+
+/**
+ * intel_bb_offset_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @offset: offset within bb to be patched
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ *
+ * Function prepares relocation (execobj if required + reloc). It it used
+ * for editing batchbuffer via modifying structures. It means when we're
+ * preparing batchbuffer it is more descriptive to edit the structure
+ * than emitting dwords. But it require for some fields to point the
+ * relocation. For that case @offset is passed by the user and it points
+ * to the offset in bb where the relocation will be applied.
+ */
+uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
+ uint32_t handle,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t offset,
+ uint64_t presumed_offset)
+{
+ igt_assert(ibb);
+
+ return intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
+ 0, offset, presumed_offset);
+}
+
+static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ struct drm_i915_gem_exec_object2 *objects;
+ struct drm_i915_gem_relocation_entry *relocs, *reloc;
+ int i, j;
+
+ igt_info("execbuf batch len: %u, start offset: 0x%x, "
+ "DR1: 0x%x, DR4: 0x%x, "
+ "num clip: %u, clipptr: 0x%llx, "
+ "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n",
+ execbuf->batch_len, execbuf->batch_start_offset,
+ execbuf->DR1, execbuf->DR4,
+ execbuf->num_cliprects, execbuf->cliprects_ptr,
+ execbuf->flags, execbuf->rsvd1, execbuf->rsvd2);
+
+ igt_info("execbuf buffer_count: %d\n", execbuf->buffer_count);
+ for (i = 0; i < execbuf->buffer_count; i++) {
+ objects = &((struct drm_i915_gem_exec_object2 *)
+ from_user_pointer(execbuf->buffers_ptr))[i];
+ relocs = from_user_pointer(objects->relocs_ptr);
+ igt_info(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, "
+ "align: 0x%llx, offset: 0x%llx, flags: 0x%llx, "
+ "rsvd1: 0x%llx, rsvd2: 0x%llx\n",
+ i, objects->handle, objects->relocation_count,
+ relocs,
+ objects->alignment,
+ objects->offset, objects->flags,
+ objects->rsvd1, objects->rsvd2);
+ if (objects->relocation_count) {
+ igt_info("\texecbuf relocs:\n");
+ for (j = 0; j < objects->relocation_count; j++) {
+ reloc = &relocs[j];
+ igt_info("\t [%d] target handle: %u, "
+ "offset: 0x%llx, delta: 0x%x, "
+ "presumed_offset: 0x%llx, "
+ "read_domains: 0x%x, "
+ "write_domain: 0x%x\n",
+ j, reloc->target_handle,
+ reloc->offset, reloc->delta,
+ reloc->presumed_offset,
+ reloc->read_domains,
+ reloc->write_domain);
+ }
+ }
+ }
+}
+
+static void print_node(const void *node, VISIT which, int depth)
+{
+ const struct drm_i915_gem_exec_object2 *object =
+ *(const struct drm_i915_gem_exec_object2 **) node;
+ (void) depth;
+
+ switch (which) {
+ case preorder:
+ case endorder:
+ break;
+
+ case postorder:
+ case leaf:
+ igt_info("\t handle: %u, offset: 0x%" PRIx64 "\n",
+ object->handle, (uint64_t) object->offset);
+ break;
+ }
+}
+
+/*
+ * @__intel_bb_exec:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @ctx: context
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Returns: 0 on success, otherwise errno.
+ *
+ * Note: In this step execobj for bb is allocated and inserted to the objects
+ * array.
+*/
+int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+ uint32_t ctx, uint64_t flags, bool sync)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+ int ret;
+
+ ibb->objects[0].relocs_ptr = to_user_pointer(ibb->relocs);
+ ibb->objects[0].relocation_count = ibb->num_relocs;
+ ibb->objects[0].handle = ibb->handle;
+
+ gem_write(ibb->i915, ibb->handle, 0, ibb->batch, ibb->size);
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = (uintptr_t) ibb->objects;
+ execbuf.buffer_count = ibb->num_objects;
+ execbuf.batch_len = end_offset;
+ execbuf.rsvd1 = ctx;
+ execbuf.flags = flags | I915_EXEC_BATCH_FIRST;
+
+ ret = __gem_execbuf(ibb->i915, &execbuf);
+ if (ret)
+ return ret;
+
+ if (sync || ibb->debug)
+ gem_sync(ibb->i915, ibb->handle);
+
+ if (ibb->debug) {
+ intel_bb_dump_execbuf(&execbuf);
+ if (intel_bb_debug_tree) {
+ igt_info("\nTree:\n");
+ twalk(ibb->root, print_node);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * intel_bb_exec:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Do execbuf with default context. Asserts on failure.
+*/
+void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+ uint64_t flags, bool sync)
+{
+ igt_assert_eq(__intel_bb_exec(ibb, end_offset, 0, flags, sync), 0);
+}
+
+/*
+ * intel_bb_exec_with_context:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @ctx: context
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Do execbuf with context @context.
+*/
+void intel_bb_exec_with_context(struct intel_bb *ibb, uint32_t end_offset,
+ uint32_t ctx, uint64_t flags, bool sync)
+{
+ igt_assert_eq(__intel_bb_exec(ibb, end_offset, ctx, flags, sync), 0);
+}
+
+/**
+ * intel_bb_get_object_address:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle
+ *
+ * When objects addresses are previously pinned and we don't want to relocate
+ * we need to acquire them from previous execbuf. Function returns previous
+ * object offset for @handle or 0 if object is not found.
+ */
+uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle)
+{
+ struct drm_i915_gem_exec_object2 object = { .handle = handle };
+ struct drm_i915_gem_exec_object2 **found;
+
+ igt_assert(ibb);
+
+ found = tfind((void *) &object, &ibb->root, __compare_objects);
+ if (!found)
+ return 0;
+
+ return (*found)->offset;
+}