4 files changed, 278 insertions, 0 deletions
diff --git a/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml b/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
index 0dc5a0b7..c22e70b7 100644
--- a/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
+++ b/docs/reference/igt-gpu-tools/igt-gpu-tools-docs.xml
@@ -56,6 +56,7 @@
   </chapter>
   <chapter>
     <title>igt/i915 API Reference</title>
+    <xi:include href="xml/gem_create.xml"/>
     <xi:include href="xml/gem_context.xml"/>
     <xi:include href="xml/gem_engine_topology.xml"/>
     <xi:include href="xml/gem_scheduler.xml"/>
diff --git a/lib/i915/gem_create.c b/lib/i915/gem_create.c
index b2e8d559..605c4513 100644
--- a/lib/i915/gem_create.c
+++ b/lib/i915/gem_create.c
@@ -4,12 +4,25 @@
  */
 
 #include <errno.h>
+#include <pthread.h>
 
+#include "drmtest.h"
 #include "gem_create.h"
 #include "i915_drm.h"
 #include "igt_core.h"
+#include "igt_list.h"
+#include "igt_map.h"
 #include "ioctl_wrappers.h"
 
+/**
+ * SECTION:gem_create
+ * @short_description: Helpers for dealing with objects creation
+ * @title: GEM Create
+ *
+ * This helper library contains functions used for handling creating gem
+ * objects.
+ */
+
 int __gem_create(int fd, uint64_t *size, uint32_t *handle)
 {
 	struct drm_i915_gem_create create = {
@@ -88,3 +101,261 @@ uint32_t gem_create_ext(int fd, uint64_t size, struct i915_user_extension *ext)
 
 	return handle;
 }
+
+static struct igt_map *pool;
+static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+struct pool_entry {
+	int fd;
+	uint32_t handle;
+	uint64_t size;		/* requested bo size */
+	uint64_t bo_size;	/* created bo size */
+	uint32_t region;
+	struct igt_list_head link;
+};
+
+struct pool_list {
+	uint64_t size;
+	struct igt_list_head list;
+};
+
+static struct pool_entry *find_or_create(int fd, struct pool_list *pl,
+					 uint64_t size, uint32_t region)
+{
+	struct pool_entry *pe;
+	bool found = false;
+
+	igt_list_for_each_entry(pe, &pl->list, link) {
+		if (pe->fd == fd && pe->size == size && pe->region == region &&
+		    !gem_bo_busy(fd, pe->handle)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pe = calloc(1, sizeof(*pe));
+		if (!pe)
+			goto out;
+
+		pe->fd = fd;
+		pe->bo_size = size;
+		if (__gem_create_in_memory_regions(fd, &pe->handle, &pe->bo_size, region)) {
+			free(pe);
+			pe = NULL;
+			goto out;
+		}
+		pe->size = size;
+		pe->region = region;
+
+		igt_list_add_tail(&pe->link, &pl->list);
+	}
+
+out:
+	return pe;
+}
+
+/**
+ * gem_create_from_pool:
+ * @fd: open i915 drm file descriptor
+ * @size: pointer to size, on input it points to requested bo size,
+ * on output created bo size will be stored there
+ * @region: region in which bo should be created
+ *
+ * Function returns bo handle which is free to use (not busy). Internally
+ * it iterates over previously allocated bo and returns first free. If there
+ * are no free bo a new one is created.
+ *
+ * Returns: bo handle + created bo size (via pointer to size)
+ */
+uint32_t gem_create_from_pool(int fd, uint64_t *size, uint32_t region)
+{
+	struct pool_list *pl;
+	struct pool_entry *pe;
+
+	pthread_mutex_lock(&pool_mutex);
+
+	pl = igt_map_search(pool, size);
+	if (!pl) {
+		pl = calloc(1, sizeof(*pl));
+		if (!pl)
+			goto out;
+
+		IGT_INIT_LIST_HEAD(&pl->list);
+		pl->size = *size;
+		igt_map_insert(pool, &pl->size, pl);
+	}
+	pe = find_or_create(fd, pl, *size, region);
+
+out:
+	pthread_mutex_unlock(&pool_mutex);
+
+	igt_assert(pl && pe);
+
+	return pe->handle;
+}
+
+static void __pool_list_free_func(struct igt_map_entry *entry)
+{
+	free(entry->data);
+}
+
+static void __destroy_pool(struct igt_map *map, pthread_mutex_t *mutex)
+{
+	struct igt_map_entry *pos;
+	const struct pool_list *pl;
+	struct pool_entry *pe, *tmp;
+
+	if (!map)
+		return;
+
+	pthread_mutex_lock(mutex);
+
+	igt_map_foreach(map, pos) {
+		pl = pos->key;
+		igt_list_for_each_entry_safe(pe, tmp, &pl->list, link) {
+			gem_close(pe->fd, pe->handle);
+			igt_list_del(&pe->link);
+			free(pe);
+		}
+	}
+
+	pthread_mutex_unlock(mutex);
+
+	igt_map_destroy(map, __pool_list_free_func);
+}
+
+void gem_pool_dump(void)
+{
+	struct igt_map_entry *pos;
+	const struct pool_list *pl;
+	struct pool_entry *pe;
+
+	if (!pool)
+		return;
+
+	pthread_mutex_lock(&pool_mutex);
+
+	igt_debug("[pool]\n");
+	igt_map_foreach(pool, pos) {
+		pl = pos->key;
+		igt_debug("bucket [%llx]\n", (long long) pl->size);
+		igt_list_for_each_entry(pe, &pl->list, link)
+			igt_debug(" - handle: %u, size: %llx, bo_size: %llx, region: %x\n",
+				  pe->handle, (long long) pe->size,
+				  (long long) pe->bo_size, pe->region);
+	}
+
+	pthread_mutex_unlock(&pool_mutex);
+}
+
+#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
+static inline uint32_t hash_pool(const void *val)
+{
+	uint64_t hash = *(uint64_t *) val;
+
+	hash = hash * GOLDEN_RATIO_PRIME_64;
+	return hash >> 32;
+}
+
+static int equal_pool(const void *a, const void *b)
+{
+	struct pool_list *p1 = (struct pool_list *) a;
+	struct pool_list *p2 = (struct pool_list *) b;
+
+	return p1->size == p2->size;
+}
+
+/**
+ * gem_pool_init:
+ *
+ * Function initializes bo pool (kind of bo cache). Main purpose of it is to
+ * support working with softpin to achieve pipelined execution on gpu (without
+ * stalls).
+ *
+ * For example imagine code as follows:
+ *
+ * |[<!-- language="C" -->
+ * uint32_t bb = gem_create(fd, 4096);
+ * uint32_t *bbptr = gem_mmap__device_coherent(fd, bb, ...)
+ * uint32_t *cmd = bbptr;
+ * ...
+ * *cmd++ = ...gpu commands...
+ * ...
+ * *cmd++ = MI_BATCH_BUFFER_END;
+ * ...
+ * gem_execbuf(fd, execbuf); // bb is part of execbuf   <--- first execbuf
+ *
+ * cmd = bbptr;
+ * ...
+ * *cmd++ = ... next gpu commands...
+ * ...
+ * *cmd++ = MI_BATCH_BUFFER_END;
+ * ...
+ * gem_execbuf(fd, execbuf); // bb is part of execbuf   <--- second execbuf
+ * ]|
+ *
+ * Above code is prone to gpu hang because when bb was submitted to gpu
+ * we immediately started writing to it. If gpu started executing commands
+ * from first execbuf we're overwriting it leading to unpredicted behavior
+ * (partially execute from first and second commands or we get gpu hang).
+ * To avoid this we can sync after first execbuf but we will get stall
+ * in execution. For some tests it might be accepted but such "isolated"
+ * execution hides bugs (synchronization, cache flushes, etc).
+ *
+ * So, to achive pipelined execution we need to use another bb. If we would
+ * like to enqueue more work which is serialized we would need more bbs
+ * (depends on execution speed). Handling this manually is cumbersome as
+ * we need to track all bb and their status (busy or free).
+ *
+ * Solution to above is gem pool. It returns first handle of requested size
+ * which is not busy (or create a new one if there's none or all of bo are
+ * in use). Here's an example how to use it:
+ *
+ * |[<!-- language="C" -->
+ * uint64_t bbsize = 4096;
+ * uint32_t bb = gem_create_from_pool(fd, &bbsize, REGION_SMEM);
+ * uint32_t *bbptr = gem_mmap__device_coherent(fd, bb, ...)
+ * uint32_t *cmd = bbptr;
+ * ...
+ * *cmd++ = ...gpu commands...
+ * ...
+ * *cmd++ = MI_BATCH_BUFFER_END;
+ * gem_munmap(bbptr, bbsize);
+ * ...
+ * gem_execbuf(fd, execbuf); // bb is part of execbuf   <--- first execbuf
+ *
+ * bbsize = 4096;
+ * bb = gem_create_from_pool(fd, &bbsize, REGION_SMEM);
+ * cmd = bbptr;
+ * ...
+ * *cmd++ = ... next gpu commands...
+ * ...
+ * *cmd++ = MI_BATCH_BUFFER_END;
+ * gem_munmap(bbptr, bbsize);
+ * ...
+ * gem_execbuf(fd, execbuf); // bb is part of execbuf   <--- second execbuf
+ * ]|
+ *
+ * Assuming first execbuf is executed we will get new bb handle when we call
+ * gem_create_from_pool(). When test completes pool is freed automatically
+ * in igt core (all handles will be closed, memory will be freed and gem pool
+ * will be reinitialized for next test).
+ *
+ * Some explanation is needed why we need to put pointer to size instead of
+ * passing absolute value. On discrete regarding memory placement (region)
+ * object created in the memory can be bigger than requested. Especially when
+ * we use allocator to handle vm space and we allocate vma with requested
+ * size (which is smaller than bo created) we can overlap with next allocation
+ * and get -ENOSPC.
+ */
+void gem_pool_init(void)
+{
+	pthread_mutex_init(&pool_mutex, NULL);
+	__destroy_pool(pool, &pool_mutex);
+	pool = igt_map_create(hash_pool, equal_pool);
+}
+
+igt_constructor {
+	gem_pool_init();
+}
diff --git a/lib/i915/gem_create.h b/lib/i915/gem_create.h
index c2b531b4..c32a815d 100644
--- a/lib/i915/gem_create.h
+++ b/lib/i915/gem_create.h
@@ -16,4 +16,8 @@ int __gem_create_ext(int fd, uint64_t *size, uint32_t *handle,
                      struct i915_user_extension *ext);
 uint32_t gem_create_ext(int fd, uint64_t size, struct i915_user_extension *ext);
 
+void gem_pool_init(void);
+void gem_pool_dump(void);
+uint32_t gem_create_from_pool(int fd, uint64_t *size, uint32_t region);
+
 #endif /* GEM_CREATE_H */
diff --git a/lib/igt_core.c b/lib/igt_core.c
index f2c701de..6dad3c84 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -58,6 +58,7 @@
 #include <glib.h>
 
 #include "drmtest.h"
+#include "i915/gem_create.h"
 #include "intel_allocator.h"
 #include "intel_batchbuffer.h"
 #include "intel_chipset.h"
@@ -1428,6 +1429,7 @@ __noreturn static void exit_subtest(const char *result)
 	 */
 	intel_allocator_init();
 	intel_bb_reinit_allocator();
+	gem_pool_init();
 
 	if (!in_dynamic_subtest)
 		_igt_dynamic_tests_executed = -1;