summaryrefslogtreecommitdiff
path: root/lib/igt_fb.c
diff options
context:
space:
mode:
authorLyude Paul <lyude@redhat.com>2021-03-24 18:36:06 -0400
committerLyude Paul <lyude@redhat.com>2021-03-26 12:29:51 -0400
commit3887134e739f480cefe1dc7f13eb54f7bf3ca27f (patch)
tree8934b963f474cec6a50f741cf4c2563d00f38ff0 /lib/igt_fb.c
parente76039273b1524147c43dba061756f06003d56ae (diff)
lib: Introduce the igt_nouveau library
This introduces the igt_nouveau library, which enables support for tiling formats on nouveau, along with accelerated clears for allocated bos in VRAM using the dma-copy engine present on Nvidia hardware since Tesla. Typically the latter would be handled by the kernel automatically, which is the long-term plan for nouveau, but since the kernel doesn't yet support that we implement this in igt in order to fulfill the expectation that most of igt has in which newly allocated fbs are expected to be zero-filled by default. The dma-copy engine is capable of fast blitting, and is also able to perform tiling/untiling at the same time. This is worth mentioning because unlike many of the other drivers supported in igt, we go out of our way to avoid using mmap() in order to perform CPU rendering wherever possible. Instead of mmap()ing an fb that we want to draw to on the CPU (whether it be for converting formats, or just normal rendering), we instead use dma-copy to blit linear/tiled fbs over to linear system memory which we mmap() instead. This is primarily because while mmap() is typically painfully slow for vram, it's even slower on nouveau due to the current lack of dynamic reclocking in our driver. Furthermore, using the dma-copy engine for copying things over to system ram is also dramatically faster than using igt's memcpy wc helpers even when no tiling is involved. Such speed improvements are both quite nice, but also very necessary for certain tests like kms_plane that are rather sensitive when it comes to slow rendering with drivers. This doesn't mean we won't want to provide a way of using mmap() for rendering in the future however, as at least basic testing of mmap() is certainly something we eventually want for nouveau. However, I think the best way for us to do this in the future will be to adapt the igt_draw API to work with nouveau so we can explicitly request using mmap() in tests which need it. Finally, this code also adds a hard dependency on libdrm support for nouveau tests. The main reason for this is currently there are no real applications that use nouveau's ioctls directly (mesa for instance, uses libdrm as well) and also that nouveau's ioctls are currently a bit complicated to use by hand. This will likely be temporary however, as Ben Skeggs is planning on revamping a lot of nouveau's APIs to simplify them and make libdrm support for nouveau obsolete in the future. Note that we take care to make sure that users can still disable libdrm support for nouveau if needed, with the only caveat being that any tests using igt_nouveau will be disabled, along with any tiling support for nvidia-specific tiling formats. This should enable igt tests which test tiling formats to run on nouveau, and fix some seemingly random test failures as a result of not having zero-filled buffers in a few other tests like kms_cursor_crc. Changes since v1: * Remove leftover rebase detritus in drm_fourcc.h Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Martin Peres <martin.peres@mupuf.org> Cc: Ben Skeggs <bskeggs@redhat.com> Cc: Jeremy Cline <jcline@redhat.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Diffstat (limited to 'lib/igt_fb.c')
-rw-r--r--lib/igt_fb.c98
1 files changed, 86 insertions, 12 deletions
diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index f0fcd1a7..4ded7e78 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -44,6 +44,7 @@
#include "igt_vc4.h"
#include "igt_amd.h"
#include "igt_x86.h"
+#include "igt_nouveau.h"
#include "ioctl_wrappers.h"
#include "intel_batchbuffer.h"
#include "intel_chipset.h"
@@ -479,6 +480,48 @@ void igt_get_fb_tile_size(int fd, uint64_t modifier, int fb_bpp,
*width_ret = 256;
*height_ret = vc4_modifier_param;
break;
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0):
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1):
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2):
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3):
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4):
+ case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5):
+ modifier = drm_fourcc_canonicalize_nvidia_format_mod(modifier);
+ /* fallthrough */
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 0):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 1):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 2):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 3):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 4):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 5):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 0):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 1):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 2):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 3):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 4):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 5):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 0):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 1):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 2):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 3):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 4):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 5):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 0):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 1):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 2):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 3):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 4):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 5):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 0):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 1):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 2):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 3):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 4):
+ case DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 5):
+ igt_require_nouveau(fd);
+ *width_ret = 64;
+ *height_ret = igt_nouveau_get_block_height(modifier);
+ break;
default:
igt_assert(false);
}
@@ -688,6 +731,16 @@ static uint32_t calc_plane_stride(struct igt_fb *fb, int plane)
} else if (is_gen12_ccs_plane(fb, plane)) {
/* A main surface using a CCS AUX surface must be 4x4 tiles aligned. */
return ALIGN(min_stride, 64);
+ } else if (!fb->modifier && is_nouveau_device(fb->fd)) {
+ int align;
+
+ /* Volta supports 47-bit memory addresses, everything before only supports 40-bit */
+ if (igt_nouveau_get_chipset(fb->fd) >= IGT_NOUVEAU_CHIPSET_GV100)
+ align = 64;
+ else
+ align = 256;
+
+ return ALIGN(min_stride, align);
} else {
unsigned int tile_width, tile_height;
@@ -954,7 +1007,7 @@ static void clear_yuv_buffer(struct igt_fb *fb)
}
/* helpers to create nice-looking framebuffers */
-static int create_bo_for_fb(struct igt_fb *fb)
+static int create_bo_for_fb(struct igt_fb *fb, bool prefer_sysmem)
{
const struct format_desc_struct *fmt = lookup_drm_format(fb->drm_format);
unsigned int bpp = 0;
@@ -972,7 +1025,8 @@ static int create_bo_for_fb(struct igt_fb *fb)
if (fb->modifier || fb->size || fb->strides[0] ||
(is_i915_device(fd) && igt_format_is_yuv(fb->drm_format)) ||
(is_i915_device(fd) && igt_format_is_fp16(fb->drm_format)) ||
- (is_amdgpu_device(fd) && igt_format_is_yuv(fb->drm_format)))
+ (is_amdgpu_device(fd) && igt_format_is_yuv(fb->drm_format)) ||
+ is_nouveau_device(fd))
device_bo = true;
/* Sets offets and stride if necessary. */
@@ -1002,6 +1056,8 @@ static int create_bo_for_fb(struct igt_fb *fb)
fb->modifier);
} else if (is_amdgpu_device(fd)) {
fb->gem_handle = igt_amd_create_bo(fd, fb->size);
+ } else if (is_nouveau_device(fd)) {
+ fb->gem_handle = igt_nouveau_create_bo(fd, prefer_sysmem, fb);
} else {
igt_assert(false);
}
@@ -1044,7 +1100,7 @@ void igt_create_bo_for_fb(int fd, int width, int height,
{
igt_init_fb(fb, fd, width, height, format, modifier,
IGT_COLOR_YCBCR_BT709, IGT_COLOR_YCBCR_LIMITED_RANGE);
- create_bo_for_fb(fb);
+ create_bo_for_fb(fb, false);
}
/**
@@ -1078,7 +1134,7 @@ int igt_create_bo_with_dimensions(int fd, int width, int height,
for (int i = 0; i < fb.num_planes; i++)
fb.strides[i] = stride;
- create_bo_for_fb(&fb);
+ create_bo_for_fb(&fb, false);
if (size_ret)
*size_ret = fb.size;
@@ -1705,7 +1761,7 @@ igt_create_fb_with_bo_size(int fd, int width, int height,
__func__, width, height, IGT_FORMAT_ARGS(format), modifier,
bo_size);
- create_bo_for_fb(fb);
+ create_bo_for_fb(fb, false);
igt_assert(fb->gem_handle > 0);
igt_debug("%s(handle=%d, pitch=%d)\n",
@@ -2382,6 +2438,9 @@ static void free_linear_mapping(struct fb_blit_upload *blit)
igt_amd_fb_convert_plane_to_tiled(fb, map, &linear->fb, linear->map);
munmap(map, fb->size);
+ } else if (is_nouveau_device(fd)) {
+ igt_nouveau_fb_blit(fb, &linear->fb);
+ igt_nouveau_delete_bo(&linear->fb);
} else {
gem_munmap(linear->map, linear->fb.size);
gem_set_domain(fd, linear->fb.gem_handle,
@@ -2434,7 +2493,7 @@ static void setup_linear_mapping(struct fb_blit_upload *blit)
fb->drm_format, LOCAL_DRM_FORMAT_MOD_NONE,
fb->color_encoding, fb->color_range);
- create_bo_for_fb(&linear->fb);
+ create_bo_for_fb(&linear->fb, true);
igt_assert(linear->fb.gem_handle > 0);
@@ -2452,6 +2511,13 @@ static void setup_linear_mapping(struct fb_blit_upload *blit)
linear->map = igt_amd_mmap_bo(fd, linear->fb.gem_handle,
linear->fb.size,
PROT_READ | PROT_WRITE);
+ } else if (is_nouveau_device(fd)) {
+ /* Currently we also blit linear bos instead of mapping them as-is, as mmap() on
+ * nouveau is quite slow right now
+ */
+ igt_nouveau_fb_blit(&linear->fb, fb);
+
+ linear->map = igt_nouveau_mmap_bo(&linear->fb, PROT_READ | PROT_WRITE);
} else {
/* Copy fb content to linear BO */
gem_set_domain(fd, linear->fb.gem_handle,
@@ -2514,7 +2580,10 @@ int igt_dirty_fb(int fd, struct igt_fb *fb)
static void unmap_bo(struct igt_fb *fb, void *ptr)
{
- gem_munmap(ptr, fb->size);
+ if (is_nouveau_device(fb->fd))
+ igt_nouveau_munmap_bo(fb);
+ else
+ gem_munmap(ptr, fb->size);
if (fb->is_dumb)
igt_dirty_fb(fb->fd, fb);
@@ -2553,6 +2622,8 @@ static void *map_bo(int fd, struct igt_fb *fb)
else if (is_amdgpu_device(fd))
ptr = igt_amd_mmap_bo(fd, fb->gem_handle, fb->size,
PROT_READ | PROT_WRITE);
+ else if (is_nouveau_device(fd))
+ ptr = igt_nouveau_mmap_bo(fb, PROT_READ | PROT_WRITE);
else
igt_assert(false);
@@ -3569,13 +3640,13 @@ static void create_cairo_surface__convert(int fd, struct igt_fb *fb)
&blit->shadow_fb);
igt_assert(blit->shadow_ptr);
- if (use_enginecopy(fb) || use_blitter(fb) ||
- igt_vc4_is_tiled(fb->modifier)) {
+ /* Note for nouveau, it's currently faster to copy fbs to/from vram (even linear ones) */
+ if (use_enginecopy(fb) || use_blitter(fb) || igt_vc4_is_tiled(fb->modifier) ||
+ is_nouveau_device(fd)) {
setup_linear_mapping(&blit->base);
/* speed things up by working from a copy in system memory */
- cvt.src.slow_reads =
- is_i915_device(fd) && !gem_has_mappable_ggtt(fd);
+ cvt.src.slow_reads = is_i915_device(fd) && !gem_has_mappable_ggtt(fd);
} else {
blit->base.linear.fb = *fb;
blit->base.linear.fb.gem_handle = 0;
@@ -3659,7 +3730,8 @@ cairo_surface_t *igt_get_cairo_surface(int fd, struct igt_fb *fb)
create_cairo_surface__convert(fd, fb);
else if (use_blitter(fb) || use_enginecopy(fb) ||
igt_vc4_is_tiled(fb->modifier) ||
- igt_amd_is_tiled(fb->modifier))
+ igt_amd_is_tiled(fb->modifier) ||
+ is_nouveau_device(fb->fd))
create_cairo_surface__gpu(fd, fb);
else
create_cairo_surface__gtt(fd, fb);
@@ -3733,6 +3805,8 @@ void igt_remove_fb(int fd, struct igt_fb *fb)
do_or_die(drmModeRmFB(fd, fb->fb_id));
if (fb->is_dumb)
kmstest_dumb_destroy(fd, fb->gem_handle);
+ else if (is_nouveau_device(fd))
+ igt_nouveau_delete_bo(fb);
else
gem_close(fd, fb->gem_handle);
fb->fb_id = 0;