diff options
author | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2018-01-22 12:21:34 +0100 |
---|---|---|
committer | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2018-02-01 14:19:48 +0100 |
commit | 4452dcb4d8166cb3895ddc016267fd46c674b7f5 (patch) | |
tree | f1ad8e6cfbadf3dcd4a976a7d4884650466b763a /lib/igt_fb.c | |
parent | 918ea427d0255b5bc74da3dcccec8cc3fe65631f (diff) |
lib/igt_fb: Add support for NV12 format through conversion, v2.
For NV12 a format conversion is needed. Because YUV formats are not
fully defined with just a fourcc, I've chosen BT.601 limited range.
This puts the pixel center of the CbCr components between the top
left Y and bottom left Y:
Y Y Y Y
UV UV
Y Y Y Y
Some work is put into optimising the conversion routines in order to
make it fast enough. Before converting nv12 to rgb24, it is copied to
a temporary buffer to take advantage of memory caching. This is
approximately 20x faster than directly reading the BO.
When testing on my KBL with a 1080p buffer, it takes approximately
.1s to convert either way, this is fast enough not to bother optimising
even further for me.
Changes since v1:
- Use BT.601 instead of BT.709 coefficients.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Diffstat (limited to 'lib/igt_fb.c')
-rw-r--r-- | lib/igt_fb.c | 391 |
1 files changed, 319 insertions, 72 deletions
diff --git a/lib/igt_fb.c b/lib/igt_fb.c index 66da1ffa..6fecdd66 100644 --- a/lib/igt_fb.c +++ b/lib/igt_fb.c @@ -70,6 +70,7 @@ static struct format_desc_struct { DF(XRGB8888, RGB24, 32, 24), DF(XRGB2101010, RGB30, 32, 30), DF(ARGB8888, ARGB32, 32, 32), + DF(NV12, RGB24, 32, -1, 2, {8, 16}), }; #undef DF @@ -364,14 +365,20 @@ static int create_bo_for_fb(int fd, int width, int height, *is_dumb = false; if (is_i915_device(fd)) { - uint32_t *ptr; + uint8_t *ptr; bo = gem_create(fd, size); gem_set_tiling(fd, bo, igt_fb_mod_to_tiling(tiling), stride); /* Ensure the framebuffer is preallocated */ - ptr = gem_mmap__gtt(fd, bo, size, PROT_READ); - igt_assert(*ptr == 0); + ptr = gem_mmap__gtt(fd, bo, size, PROT_READ | PROT_WRITE); + igt_assert(*(uint32_t *)ptr == 0); + + if (format->drm_id == DRM_FORMAT_NV12) { + /* component formats have a different zero point */ + memset(ptr, 16, offsets[1]); + memset(ptr + offsets[1], 0x80, (height + 1)/2 * stride); + } gem_munmap(ptr, size); if (size_ret) @@ -1127,103 +1134,117 @@ static cairo_format_t drm_format_to_cairo(uint32_t drm_format) drm_format, igt_format_str(drm_format)); } +struct fb_blit_linear { + uint32_t handle; + unsigned size, stride; + uint8_t *map; + bool is_dumb; + uint32_t offsets[4]; +}; + struct fb_blit_upload { int fd; struct igt_fb *fb; - struct { - uint32_t handle; - unsigned size, stride; - uint8_t *map; - bool is_dumb; - } linear; + struct fb_blit_linear linear; }; -static void destroy_cairo_surface__blit(void *arg) +static void free_linear_mapping(int fd, struct igt_fb *fb, struct fb_blit_linear *linear) { - struct fb_blit_upload *blit = arg; - struct igt_fb *fb = blit->fb; unsigned int obj_tiling = igt_fb_mod_to_tiling(fb->tiling); + int i; - gem_munmap(blit->linear.map, blit->linear.size); - fb->cairo_surface = NULL; + gem_munmap(linear->map, linear->size); + gem_set_domain(fd, linear->handle, + I915_GEM_DOMAIN_GTT, 0); + + for (i = 0; i < fb->num_planes; i++) + igt_blitter_fast_copy__raw(fd, + linear->handle, + linear->offsets[i], + linear->stride, + I915_TILING_NONE, + 0, 0, /* src_x, src_y */ + fb->plane_width[i], fb->plane_height[i], + fb->plane_bpp[i], + fb->gem_handle, + fb->offsets[i], + fb->stride, + obj_tiling, + 0, 0 /* dst_x, dst_y */); + + gem_sync(fd, linear->handle); + gem_close(fd, linear->handle); +} - gem_set_domain(blit->fd, blit->linear.handle, - I915_GEM_DOMAIN_GTT, 0); +static void destroy_cairo_surface__blit(void *arg) +{ + struct fb_blit_upload *blit = arg; - igt_blitter_fast_copy__raw(blit->fd, - blit->linear.handle, 0, - blit->linear.stride, - I915_TILING_NONE, - 0, 0, /* src_x, src_y */ - fb->width, fb->height, - igt_drm_format_to_bpp(fb->drm_format), - fb->gem_handle, - fb->offsets[0], - fb->stride, - obj_tiling, - 0, 0 /* dst_x, dst_y */); - - gem_sync(blit->fd, blit->linear.handle); - gem_close(blit->fd, blit->linear.handle); + blit->fb->cairo_surface = NULL; + + free_linear_mapping(blit->fd, blit->fb, &blit->linear); free(blit); } -static void create_cairo_surface__blit(int fd, struct igt_fb *fb) +static void setup_linear_mapping(int fd, struct igt_fb *fb, struct fb_blit_linear *linear) { - struct fb_blit_upload *blit; - cairo_format_t cairo_format; unsigned int obj_tiling = igt_fb_mod_to_tiling(fb->tiling); - uint32_t offsets[4]; - - blit = malloc(sizeof(*blit)); - igt_assert(blit); + int i; /* * We create a linear BO that we'll map for the CPU to write to (using * cairo). This linear bo will be then blitted to its final * destination, tiling it at the same time. */ - blit->linear.handle = create_bo_for_fb(fd, fb->width, fb->height, + linear->handle = create_bo_for_fb(fd, fb->width, fb->height, lookup_drm_format(fb->drm_format), LOCAL_DRM_FORMAT_MOD_NONE, 0, - 0, &blit->linear.size, - &blit->linear.stride, - offsets, &blit->linear.is_dumb); - - igt_assert(blit->linear.handle > 0); + 0, &linear->size, + &linear->stride, + linear->offsets, &linear->is_dumb); - blit->fd = fd; - blit->fb = fb; + igt_assert(linear->handle > 0); /* Copy fb content to linear BO */ - gem_set_domain(fd, blit->linear.handle, + gem_set_domain(fd, linear->handle, I915_GEM_DOMAIN_GTT, 0); - igt_blitter_fast_copy__raw(fd, - fb->gem_handle, - fb->offsets[0], - fb->stride, - obj_tiling, - 0, 0, /* src_x, src_y */ - fb->width, fb->height, - igt_drm_format_to_bpp(fb->drm_format), - blit->linear.handle, 0, - blit->linear.stride, - I915_TILING_NONE, - 0, 0 /* dst_x, dst_y */); - - gem_sync(fd, blit->linear.handle); - - gem_set_domain(fd, blit->linear.handle, + for (i = 0; i < fb->num_planes; i++) + igt_blitter_fast_copy__raw(fd, + fb->gem_handle, + fb->offsets[i], + fb->stride, + obj_tiling, + 0, 0, /* src_x, src_y */ + fb->plane_width[i], fb->plane_height[i], + fb->plane_bpp[i], + linear->handle, linear->offsets[i], + linear->stride, + I915_TILING_NONE, + 0, 0 /* dst_x, dst_y */); + + gem_sync(fd, linear->handle); + + gem_set_domain(fd, linear->handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); /* Setup cairo context */ - blit->linear.map = gem_mmap__cpu(fd, - blit->linear.handle, - 0, - blit->linear.size, - PROT_READ | PROT_WRITE); + linear->map = gem_mmap__cpu(fd, linear->handle, + 0, linear->size, PROT_READ | PROT_WRITE); +} + +static void create_cairo_surface__blit(int fd, struct igt_fb *fb) +{ + struct fb_blit_upload *blit; + cairo_format_t cairo_format; + + blit = malloc(sizeof(*blit)); + igt_assert(blit); + + blit->fd = fd; + blit->fb = fb; + setup_linear_mapping(fd, fb, &blit->linear); cairo_format = drm_format_to_cairo(fb->drm_format); fb->cairo_surface = @@ -1285,6 +1306,233 @@ static void create_cairo_surface__gtt(int fd, struct igt_fb *fb) fb, destroy_cairo_surface__gtt); } +struct fb_convert_blit_upload { + int fd; + struct igt_fb *fb; + + struct { + uint8_t *map; + unsigned stride, size; + } rgb24; + + struct fb_blit_linear linear; +}; + +static uint8_t clamprgb(float val) { + if (val < 0) + return 0; + if (val > 255) + return 255; + + return (uint8_t)val; +} + +static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_upload *blit) +{ + int i, j; + const uint8_t *y, *uv; + uint8_t *rgb24 = blit->rgb24.map; + unsigned rgb24_stride = blit->rgb24.stride, planar_stride = blit->linear.stride; + uint8_t *buf = malloc(blit->linear.size); + + /* + * Reading from the BO is awfully slow because of lack of read caching, + * it's faster to copy the whole BO to a temporary buffer and convert + * from there. + */ + memcpy(buf, blit->linear.map, blit->linear.size); + y = &buf[blit->linear.offsets[0]]; + uv = &buf[blit->linear.offsets[1]]; + + /* Convert from limited color range BT.601 */ + for (i = 0; i < fb->height / 2; i++) { + for (j = 0; j < fb->width; j++) { + float r_, g_, b_, y0, y1, cb, cr; + /* Convert 1x2 pixel blocks */ + + y0 = 1.164f * (y[j] - 16.f); + y1 = 1.164f * (y[j + planar_stride] - 16.f); + + cb = uv[j & ~1] - 128.f; + cr = uv[j | 1] - 128.f; + + r_ = 0.000f * cb + 1.596f * cr; + g_ = -0.392f * cb + -0.813f * cr; + b_ = 2.017f * cb + 0.000f * cr; + + rgb24[j * 4 + 2] = clamprgb(y0 + r_); + rgb24[j * 4 + 2 + rgb24_stride] = clamprgb(y1 + r_); + + rgb24[j * 4 + 1] = clamprgb(y0 + g_); + rgb24[j * 4 + 1 + rgb24_stride] = clamprgb(y1 + g_); + + rgb24[j * 4] = clamprgb(y0 + b_); + rgb24[j * 4 + rgb24_stride] = clamprgb(y1 + b_); + } + + rgb24 += 2 * rgb24_stride; + y += 2 * planar_stride; + uv += planar_stride; + } + + if (fb->height & 1) { + /* Convert last row */ + for (j = 0; j < fb->width; j++) { + float r_, g_, b_, y0, cb, cr; + /* Convert single pixel */ + + cb = uv[j & ~1] - 128.f; + cr = uv[j | 1] - 128.f; + + y0 = 1.164f * (y[j] - 16.f); + r_ = 0.000f * cb + 1.596f * cr; + g_ = -0.392f * cb + -0.813f * cr; + b_ = 2.017f * cb + 0.000f * cr; + + rgb24[j * 4 + 2] = clamprgb(y0 + r_); + rgb24[j * 4 + 1] = clamprgb(y0 + g_); + rgb24[j * 4] = clamprgb(y0 + b_); + } + } + + free(buf); +} + +static void convert_rgb24_to_nv12(struct igt_fb *fb, struct fb_convert_blit_upload *blit) +{ + int i, j; + uint8_t *y = &blit->linear.map[blit->linear.offsets[0]]; + uint8_t *uv = &blit->linear.map[blit->linear.offsets[1]]; + const uint8_t *rgb24 = blit->rgb24.map; + unsigned rgb24_stride = blit->rgb24.stride; + unsigned planar_stride = blit->linear.stride; + + igt_assert_f(fb->drm_format == DRM_FORMAT_NV12, + "Conversion not implemented for !NV12 planar formats\n"); + + for (i = 0; i < fb->plane_height[0]; i++) { + /* Use limited color range BT.601 */ + + for (j = 0; j < fb->plane_width[0]; j++) { + float yf = 0.257f * rgb24[j * 4 + 2] + + 0.504f * rgb24[j * 4 + 1] + + 0.098f * rgb24[j * 4] + 16; + + y[j] = (uint8_t)yf; + } + + rgb24 += rgb24_stride; + y += planar_stride; + } + + rgb24 = blit->rgb24.map; + + for (i = 0; i < fb->height / 2; i++) { + for (j = 0; j < fb->plane_width[1]; j++) { + /* + * Pixel center for Cb'Cr' is between the left top and + * bottom pixel in a 2x2 block, so take the average. + */ + float uf = -0.148f/2 * rgb24[j * 8 + 2] + + -0.148f/2 * rgb24[j * 8 + 2 + rgb24_stride] + + -0.291f/2 * rgb24[j * 8 + 1] + + -0.291f/2 * rgb24[j * 8 + 1 + rgb24_stride] + + 0.439f/2 * rgb24[j * 8] + + 0.439f/2 * rgb24[j * 8 + rgb24_stride] + 128; + float vf = 0.439f/2 * rgb24[j * 8 + 2] + + 0.439f/2 * rgb24[j * 8 + 2 + rgb24_stride] + + -0.368f/2 * rgb24[j * 8 + 1] + + -0.368f/2 * rgb24[j * 8 + 1 + rgb24_stride] + + -0.071f/2 * rgb24[j * 8] + + -0.071f/2 * rgb24[j * 8 + rgb24_stride] + 128; + uv[j * 2] = (uint8_t)uf; + uv[j * 2 + 1] = (uint8_t)vf; + } + + rgb24 += 2 * rgb24_stride; + uv += planar_stride; + } + + /* Last row cannot be interpolated between 2 pixels, take the single value */ + if (i < fb->plane_height[1]) { + for (j = 0; j < fb->plane_width[1]; j++) { + float uf = -0.148f * rgb24[j * 8 + 2] + + -0.291f * rgb24[j * 8 + 1] + + 0.439f * rgb24[j * 8] + 128; + float vf = 0.439f * rgb24[j * 8 + 2] + + -0.368f * rgb24[j * 8 + 1] + + -0.071f * rgb24[j * 8] + 128; + + uv[j * 2] = (uint8_t)uf; + uv[j * 2 + 1] = (uint8_t)vf; + } + } +} + +static void destroy_cairo_surface__convert(void *arg) +{ + struct fb_convert_blit_upload *blit = arg; + struct igt_fb *fb = blit->fb; + + /* Convert back to planar! */ + igt_assert_f(fb->drm_format == DRM_FORMAT_NV12, + "Conversion not implemented for !NV12 planar formats\n"); + + convert_rgb24_to_nv12(fb, blit); + + munmap(blit->rgb24.map, blit->rgb24.size); + + if (blit->linear.handle) + free_linear_mapping(blit->fd, blit->fb, &blit->linear); + else + gem_munmap(blit->linear.map, fb->size); + + free(blit); + + fb->cairo_surface = NULL; +} + +static void create_cairo_surface__convert(int fd, struct igt_fb *fb) +{ + struct fb_convert_blit_upload *blit = malloc(sizeof(*blit)); + igt_assert(blit); + + blit->fd = fd; + blit->fb = fb; + blit->rgb24.stride = ALIGN(fb->width * 4, 16); + blit->rgb24.size = ALIGN(blit->rgb24.stride * fb->height, sysconf(_SC_PAGESIZE)); + blit->rgb24.map = mmap(NULL, blit->rgb24.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + igt_assert(blit->rgb24.map != MAP_FAILED); + + if (fb->tiling == LOCAL_I915_FORMAT_MOD_Y_TILED || + fb->tiling == LOCAL_I915_FORMAT_MOD_Yf_TILED) { + setup_linear_mapping(fd, fb, &blit->linear); + } else { + blit->linear.handle = 0; + blit->linear.map = gem_mmap__gtt(fd, fb->gem_handle, fb->size, + PROT_READ | PROT_WRITE); + igt_assert(blit->linear.map); + blit->linear.stride = fb->stride; + blit->linear.size = fb->size; + memcpy(blit->linear.offsets, fb->offsets, sizeof(fb->offsets)); + } + + /* Convert to linear! */ + igt_assert_f(fb->drm_format == DRM_FORMAT_NV12, + "Conversion not implemented for !NV12 planar formats\n"); + convert_nv12_to_rgb24(fb, blit); + + fb->cairo_surface = + cairo_image_surface_create_for_data(blit->rgb24.map, + CAIRO_FORMAT_RGB24, + fb->width, fb->height, + blit->rgb24.stride); + + cairo_surface_set_user_data(fb->cairo_surface, + (cairo_user_data_key_t *)create_cairo_surface__convert, + blit, destroy_cairo_surface__convert); +} + /** * igt_get_cairo_surface: * @fd: open drm file descriptor @@ -1298,11 +1546,10 @@ static void create_cairo_surface__gtt(int fd, struct igt_fb *fb) */ cairo_surface_t *igt_get_cairo_surface(int fd, struct igt_fb *fb) { - /* This doesn't work for planar formats for now, but we will convert them to RGB24 in the future. */ - igt_assert(fb->num_planes < 2); - if (fb->cairo_surface == NULL) { - if (fb->tiling == LOCAL_I915_FORMAT_MOD_Y_TILED || + if (fb->num_planes > 1) + create_cairo_surface__convert(fd, fb); + else if (fb->tiling == LOCAL_I915_FORMAT_MOD_Y_TILED || fb->tiling == LOCAL_I915_FORMAT_MOD_Yf_TILED) create_cairo_surface__blit(fd, fb); else |