// SPDX-License-Identifier: MIT /* * Copyright © 2021 Intel Corporation */ #ifndef __INTEL_ALLOCATOR_H__ #define __INTEL_ALLOCATOR_H__ #include #include #include #include #include #include "i915/gem_submission.h" /** * SECTION:intel_allocator * @short_description: igt implementation of allocator * @title: Intel allocator * @include: intel_allocator.h * * # Introduction * * With the era of discrete cards we requested to adopt IGT to handle * addresses in userspace only (softpin, without support of relocations). * Writing an allocator for single purpose would be relatively easy * but supporting different tests with different requirements became * quite complicated task where couple of scenarios may be not covered yet. * * # Assumptions * * - Allocator has to work in multiprocess / multithread environment. * - Allocator backend (algorithm) should be plugable. Currently we support * SIMPLE (borrowed from Mesa allocator), RELOC (pseudo allocator which * returns incremented addresses without checking overlapping) * and RANDOM (pseudo allocator which randomize addresses without * checking overlapping). * - Has to integrate in intel-bb (our simpler libdrm replacement used in * couple of tests). * * # Implementation * * ## Single process (allows multiple threads) * * For single process we don't need to create dedicated * entity (kind of arbiter) to solve allocations. Simple locking over * allocator data structure is enough. As basic usage example would be: * * |[ * struct object { * uint32_t handle; * uint64_t offset; * uint64_t size; * }; * * struct object obj1, obj2; * uint64_t ahnd, startp, endp, size = 4096, align = 1 << 13; * int fd = -1; * * fd = drm_open_driver(DRIVER_INTEL); * ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE); * * obj1.handle = gem_create(4096); * obj2.handle = gem_create(4096); * * // Reserve hole for an object in given address. * // In this example the first possible address. * intel_allocator_get_address_range(ahnd, &startp, &endp); * obj1.offset = startp; * igt_assert(intel_allocator_reserve(ahnd, obj1.handle, size, startp)); * * // Get the most suitable offset for the object. Preferred way. * obj2.offset = intel_allocator_alloc(ahnd, obj2.handle, size, align); * * ... * * // Reserved addresses can be only freed by unreserve. * intel_allocator_unreserve(ahnd, obj1.handle, size, obj1.offset); * intel_allocator_free(ahnd, obj2.handle); * * gem_close(obj1.handle); * gem_close(obj2.handle); * ]| * * Description: * - ahnd is allocator handle (vm space handled by it) * - we call get_address_range() to get start/end range provided by the * allocator (we haven't specified its range in open so allocator code will * assume some safe address range - we don't want to exercise some potential * HW bugs on the last page) * - alloc() / free() pair just gets address for gem object proposed by the * allocator * - reserve() / unreserve() pair gives us full control of acquire/return * range we're interested in * * ## Multiple processes * * When process forks and its child uses same fd vm its address space is also * the same. Some coordination - in this case interprocess communication - * is required to assign proper addresses for gem objects and avoid collision. * Additional thread is spawned for such case to cover child processes needs. * It uses some form of communication channel to receive, perform action * (alloc, free...) and send response to requesting process. Currently * SYSVIPC message queue was chosen for this but it can replaced by other * mechanism. Allocation techniques are same as for single process, we * just need to wrap such code with: * * * |[ * * * intel_allocator_multiprocess_start(); * * ... allocation code (open, close, alloc, free, ...) * * intel_allocator_multiprocess_stop(); * ]| * * Calling start() spawns additional allocator thread ready for handling * incoming allocation requests (open / close are also requests in that case). * * Calling stop() request to stop allocator thread unblocking all pending * children (if any). */ enum allocator_strategy { ALLOC_STRATEGY_NONE, ALLOC_STRATEGY_LOW_TO_HIGH, ALLOC_STRATEGY_HIGH_TO_LOW }; struct intel_allocator { int fd; uint8_t type; enum allocator_strategy strategy; uint64_t default_alignment; _Atomic(int32_t) refcount; pthread_mutex_t mutex; /* allocator's private structure */ void *priv; void (*get_address_range)(struct intel_allocator *ial, uint64_t *startp, uint64_t *endp); uint64_t (*alloc)(struct intel_allocator *ial, uint32_t handle, uint64_t size, uint64_t alignment, enum allocator_strategy strategy); bool (*is_allocated)(struct intel_allocator *ial, uint32_t handle, uint64_t size, uint64_t offset); bool (*reserve)(struct intel_allocator *ial, uint32_t handle, uint64_t start, uint64_t end); bool (*unreserve)(struct intel_allocator *ial, uint32_t handle, uint64_t start, uint64_t end); bool (*is_reserved)(struct intel_allocator *ial, uint64_t start, uint64_t end); bool (*free)(struct intel_allocator *ial, uint32_t handle); void (*destroy)(struct intel_allocator *ial); bool (*is_empty)(struct intel_allocator *ial); void (*print)(struct intel_allocator *ial, bool full); }; void intel_allocator_init(void); void __intel_allocator_multiprocess_prepare(void); void __intel_allocator_multiprocess_start(void); void intel_allocator_multiprocess_start(void); void intel_allocator_multiprocess_stop(void); uint64_t intel_allocator_open(int fd, uint32_t ctx, uint8_t allocator_type); uint64_t intel_allocator_open_full(int fd, uint32_t ctx, uint64_t start, uint64_t end, uint8_t allocator_type, enum allocator_strategy strategy, uint64_t default_alignment); uint64_t intel_allocator_open_vm(int fd, uint32_t vm, uint8_t allocator_type); uint64_t intel_allocator_open_vm_full(int fd, uint32_t vm, uint64_t start, uint64_t end, uint8_t allocator_type, enum allocator_strategy strategy, uint64_t default_alignment); uint64_t intel_allocator_open_vm_as(uint64_t allocator_handle, uint32_t new_vm); bool intel_allocator_close(uint64_t allocator_handle); void intel_allocator_get_address_range(uint64_t allocator_handle, uint64_t *startp, uint64_t *endp); uint64_t __intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t alignment, enum allocator_strategy strategy); uint64_t intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t alignment); uint64_t intel_allocator_alloc_with_strategy(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t alignment, enum allocator_strategy strategy); bool intel_allocator_free(uint64_t allocator_handle, uint32_t handle); bool intel_allocator_is_allocated(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t offset); bool intel_allocator_reserve(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t offset); bool intel_allocator_unreserve(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t offset); bool intel_allocator_is_reserved(uint64_t allocator_handle, uint64_t size, uint64_t offset); bool intel_allocator_reserve_if_not_allocated(uint64_t allocator_handle, uint32_t handle, uint64_t size, uint64_t offset, bool *is_allocatedp); void intel_allocator_print(uint64_t allocator_handle); #define ALLOC_INVALID_ADDRESS (-1ull) #define INTEL_ALLOCATOR_NONE 0 #define INTEL_ALLOCATOR_RELOC 1 #define INTEL_ALLOCATOR_RANDOM 2 #define INTEL_ALLOCATOR_SIMPLE 3 #define GEN8_GTT_ADDRESS_WIDTH 48 static inline uint64_t sign_extend64(uint64_t x, int high) { int shift = 63 - high; return (int64_t)(x << shift) >> shift; } static inline uint64_t CANONICAL(uint64_t offset) { return sign_extend64(offset, GEN8_GTT_ADDRESS_WIDTH - 1); } #define DECANONICAL(offset) (offset & ((1ull << GEN8_GTT_ADDRESS_WIDTH) - 1)) static inline uint64_t get_simple_ahnd(int fd, uint32_t ctx) { bool do_relocs = gem_has_relocations(fd); return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_SIMPLE); } static inline uint64_t get_simple_l2h_ahnd(int fd, uint32_t ctx) { bool do_relocs = gem_has_relocations(fd); return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0, INTEL_ALLOCATOR_SIMPLE, ALLOC_STRATEGY_LOW_TO_HIGH, 0); } static inline uint64_t get_simple_h2l_ahnd(int fd, uint32_t ctx) { bool do_relocs = gem_has_relocations(fd); return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0, INTEL_ALLOCATOR_SIMPLE, ALLOC_STRATEGY_HIGH_TO_LOW, 0); } static inline uint64_t get_reloc_ahnd(int fd, uint32_t ctx) { bool do_relocs = gem_has_relocations(fd); return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_RELOC); } static inline bool put_ahnd(uint64_t ahnd) { return !ahnd || intel_allocator_close(ahnd); } static inline uint64_t get_offset(uint64_t ahnd, uint32_t handle, uint64_t size, uint64_t alignment) { if (!ahnd) return 0; return intel_allocator_alloc(ahnd, handle, size, alignment); } static inline bool put_offset(uint64_t ahnd, uint32_t handle) { if (!ahnd) return 0; return intel_allocator_free(ahnd, handle); } #endif