lib/intel_allocator.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2021 Intel Corporation
 */

#ifndef __INTEL_ALLOCATOR_H__
#define __INTEL_ALLOCATOR_H__

#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <stdint.h>
#include <stdatomic.h>
#include "i915/gem_submission.h"

/**
 * SECTION:intel_allocator
 * @short_description: igt implementation of allocator
 * @title: Intel allocator
 * @include: intel_allocator.h
 *
 * # Introduction
 *
 * With the era of discrete cards we requested to adopt IGT to handle
 * addresses in userspace only (softpin, without support of relocations).
 * Writing an allocator for single purpose would be relatively easy
 * but supporting different tests with different requirements became
 * quite complicated task where couple of scenarios may be not covered yet.
 *
 * # Assumptions
 *
 * - Allocator has to work in multiprocess / multithread environment.
 * - Allocator backend (algorithm) should be plugable. Currently we support
 *   SIMPLE (borrowed from Mesa allocator), RELOC (pseudo allocator which
 *   returns incremented addresses without checking overlapping)
 *   and RANDOM (pseudo allocator which randomize addresses without
 *   checking overlapping).
 * - Has to integrate in intel-bb (our simpler libdrm replacement used in
 *   couple of tests).
 *
 * # Implementation
 *
 * ## Single process (allows multiple threads)
 *
 * For single process we don't need to create dedicated
 * entity (kind of arbiter) to solve allocations. Simple locking over
 * allocator data structure is enough. As basic usage example would be:
 *
 * |[<!-- language="c" -->
 * struct object {
 *      uint32_t handle;
 *      uint64_t offset;
 *      uint64_t size;
 * };
 *
 * struct object obj1, obj2;
 * uint64_t ahnd, startp, endp, size = 4096, align = 1 << 13;
 * int fd = -1;
 *
 * fd = drm_open_driver(DRIVER_INTEL);
 * ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
 *
 * obj1.handle = gem_create(4096);
 * obj2.handle = gem_create(4096);
 *
 * // Reserve hole for an object in given address.
 * // In this example the first possible address.
 * intel_allocator_get_address_range(ahnd, &startp, &endp);
 * obj1.offset = startp;
 * igt_assert(intel_allocator_reserve(ahnd, obj1.handle, size, startp));
 *
 * // Get the most suitable offset for the object. Preferred way.
 * obj2.offset = intel_allocator_alloc(ahnd, obj2.handle, size, align);
 *
 *  ...
 *
 * // Reserved addresses can be only freed by unreserve.
 * intel_allocator_unreserve(ahnd, obj1.handle, size, obj1.offset);
 * intel_allocator_free(ahnd, obj2.handle);
 *
 * gem_close(obj1.handle);
 * gem_close(obj2.handle);
 * ]|
 *
 * Description:
 * - ahnd is allocator handle (vm space handled by it)
 * - we call get_address_range() to get start/end range provided by the
 *   allocator (we haven't specified its range in open so allocator code will
 *   assume some safe address range - we don't want to exercise some potential
 *   HW bugs on the last page)
 * - alloc() / free() pair just gets address for gem object proposed by the
 *   allocator
 * - reserve() / unreserve() pair gives us full control of acquire/return
 *   range we're interested in
 *
 * ## Multiple processes
 *
 * When process forks and its child uses same fd vm its address space is also
 * the same. Some coordination - in this case interprocess communication -
 * is required to assign proper addresses for gem objects and avoid collision.
 * Additional thread is spawned for such case to cover child processes needs.
 * It uses some form of communication channel to receive, perform action
 * (alloc, free...) and send response to requesting process. Currently
 * SYSVIPC message queue was chosen for this but it can replaced by other
 * mechanism. Allocation techniques are same as for single process, we
 * just need to wrap such code with:
 *
 *
 * |[<!-- language="c" -->
 *
 *
 * intel_allocator_multiprocess_start();
 *
 * ... allocation code (open, close, alloc, free, ...)
 *
 * intel_allocator_multiprocess_stop();
 * ]|
 *
 * Calling start() spawns additional allocator thread ready for handling
 * incoming allocation requests (open / close are also requests in that case).
 *
 * Calling stop() request to stop allocator thread unblocking all pending
 * children (if any).
 */

enum allocator_strategy {
	ALLOC_STRATEGY_NONE,
	ALLOC_STRATEGY_LOW_TO_HIGH,
	ALLOC_STRATEGY_HIGH_TO_LOW
};

struct intel_allocator {
	int fd;
	uint8_t type;
	enum allocator_strategy strategy;
	uint64_t default_alignment;
	_Atomic(int32_t) refcount;
	pthread_mutex_t mutex;

	/* allocator's private structure */
	void *priv;

	void (*get_address_range)(struct intel_allocator *ial,
				  uint64_t *startp, uint64_t *endp);
	uint64_t (*alloc)(struct intel_allocator *ial, uint32_t handle,
			  uint64_t size, uint64_t alignment,
			  enum allocator_strategy strategy);
	bool (*is_allocated)(struct intel_allocator *ial, uint32_t handle,
			     uint64_t size, uint64_t offset);
	bool (*reserve)(struct intel_allocator *ial,
			uint32_t handle, uint64_t start, uint64_t end);
	bool (*unreserve)(struct intel_allocator *ial,
			  uint32_t handle, uint64_t start, uint64_t end);
	bool (*is_reserved)(struct intel_allocator *ial,
			    uint64_t start, uint64_t end);
	bool (*free)(struct intel_allocator *ial, uint32_t handle);

	void (*destroy)(struct intel_allocator *ial);

	bool (*is_empty)(struct intel_allocator *ial);

	void (*print)(struct intel_allocator *ial, bool full);
};

void intel_allocator_init(void);
void __intel_allocator_multiprocess_prepare(void);
void __intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_stop(void);

uint64_t intel_allocator_open(int fd, uint32_t ctx, uint8_t allocator_type);
uint64_t intel_allocator_open_full(int fd, uint32_t ctx,
				   uint64_t start, uint64_t end,
				   uint8_t allocator_type,
				   enum allocator_strategy strategy,
				   uint64_t default_alignment);
uint64_t intel_allocator_open_vm(int fd, uint32_t vm, uint8_t allocator_type);
uint64_t intel_allocator_open_vm_full(int fd, uint32_t vm,
				      uint64_t start, uint64_t end,
				      uint8_t allocator_type,
				      enum allocator_strategy strategy,
				      uint64_t default_alignment);

uint64_t intel_allocator_open_vm_as(uint64_t allocator_handle, uint32_t new_vm);
bool intel_allocator_close(uint64_t allocator_handle);
void intel_allocator_get_address_range(uint64_t allocator_handle,
				       uint64_t *startp, uint64_t *endp);
uint64_t __intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
				 uint64_t size, uint64_t alignment,
				 enum allocator_strategy strategy);
uint64_t intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
			       uint64_t size, uint64_t alignment);
uint64_t intel_allocator_alloc_with_strategy(uint64_t allocator_handle,
					     uint32_t handle,
					     uint64_t size, uint64_t alignment,
					     enum allocator_strategy strategy);
bool intel_allocator_free(uint64_t allocator_handle, uint32_t handle);
bool intel_allocator_is_allocated(uint64_t allocator_handle, uint32_t handle,
				  uint64_t size, uint64_t offset);
bool intel_allocator_reserve(uint64_t allocator_handle, uint32_t handle,
			     uint64_t size, uint64_t offset);
bool intel_allocator_unreserve(uint64_t allocator_handle, uint32_t handle,
			       uint64_t size, uint64_t offset);
bool intel_allocator_is_reserved(uint64_t allocator_handle,
				 uint64_t size, uint64_t offset);
bool intel_allocator_reserve_if_not_allocated(uint64_t allocator_handle,
					      uint32_t handle,
					      uint64_t size, uint64_t offset,
					      bool *is_allocatedp);

void intel_allocator_print(uint64_t allocator_handle);

#define ALLOC_INVALID_ADDRESS (-1ull)
#define INTEL_ALLOCATOR_NONE   0
#define INTEL_ALLOCATOR_RELOC  1
#define INTEL_ALLOCATOR_RANDOM 2
#define INTEL_ALLOCATOR_SIMPLE 3

#define GEN8_GTT_ADDRESS_WIDTH 48

static inline uint64_t sign_extend64(uint64_t x, int high)
{
	int shift = 63 - high;

	return (int64_t)(x << shift) >> shift;
}

static inline uint64_t CANONICAL(uint64_t offset)
{
	return sign_extend64(offset, GEN8_GTT_ADDRESS_WIDTH - 1);
}

#define DECANONICAL(offset) (offset & ((1ull << GEN8_GTT_ADDRESS_WIDTH) - 1))

static inline uint64_t get_simple_ahnd(int fd, uint32_t ctx)
{
	bool do_relocs = gem_has_relocations(fd);

	return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_SIMPLE);
}

static inline uint64_t get_simple_l2h_ahnd(int fd, uint32_t ctx)
{
	bool do_relocs = gem_has_relocations(fd);

	return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
							 INTEL_ALLOCATOR_SIMPLE,
							 ALLOC_STRATEGY_LOW_TO_HIGH,
							 0);
}

static inline uint64_t get_simple_h2l_ahnd(int fd, uint32_t ctx)
{
	bool do_relocs = gem_has_relocations(fd);

	return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
							 INTEL_ALLOCATOR_SIMPLE,
							 ALLOC_STRATEGY_HIGH_TO_LOW,
							 0);
}

static inline uint64_t get_reloc_ahnd(int fd, uint32_t ctx)
{
	bool do_relocs = gem_has_relocations(fd);

	return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_RELOC);
}

static inline bool put_ahnd(uint64_t ahnd)
{
	return !ahnd || intel_allocator_close(ahnd);
}

static inline uint64_t get_offset(uint64_t ahnd, uint32_t handle,
				  uint64_t size, uint64_t alignment)
{
	if (!ahnd)
		return 0;

	return intel_allocator_alloc(ahnd, handle, size, alignment);
}

static inline bool put_offset(uint64_t ahnd, uint32_t handle)
{
	if (!ahnd)
		return 0;

	return intel_allocator_free(ahnd, handle);
}

#endif