diff options
-rw-r--r-- | lib/igt_fb.c | 3 | ||||
-rw-r--r-- | lib/igt_x86.c | 116 | ||||
-rw-r--r-- | lib/igt_x86.h | 2 | ||||
-rw-r--r-- | tests/gem_fence_thrash.c | 63 | ||||
-rw-r--r-- | tests/gem_mmap_gtt.c | 37 | ||||
-rw-r--r-- | tests/gem_tiled_pread_pwrite.c | 37 |
6 files changed, 124 insertions, 134 deletions
diff --git a/lib/igt_fb.c b/lib/igt_fb.c index ecd73053..7404ba7c 100644 --- a/lib/igt_fb.c +++ b/lib/igt_fb.c @@ -32,6 +32,7 @@ #include "drmtest.h" #include "igt_fb.h" #include "igt_kms.h" +#include "igt_x86.h" #include "ioctl_wrappers.h" #include "intel_chipset.h" @@ -1340,7 +1341,7 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo * it's faster to copy the whole BO to a temporary buffer and convert * from there. */ - memcpy(buf, blit->linear.map, blit->linear.size); + igt_memcpy_from_wc(buf, blit->linear.map, blit->linear.size); y = &buf[blit->linear.offsets[0]]; uv = &buf[blit->linear.offsets[1]]; diff --git a/lib/igt_x86.c b/lib/igt_x86.c index 0ed3c6f1..cb1e0a72 100644 --- a/lib/igt_x86.c +++ b/lib/igt_x86.c @@ -36,7 +36,11 @@ #endif #include "igt_x86.h" +#include "igt_aux.h" + +#include <stdint.h> #include <stdio.h> +#include <string.h> /** * SECTION:igt_x86 @@ -174,3 +178,115 @@ char *igt_x86_features_to_string(unsigned features, char *line) return ret; } #endif + +#if defined(__x86_64__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("sse4.1") +#pragma GCC diagnostic ignored "-Wpointer-arith" + +#include <smmintrin.h> +static void memcpy_from_wc_sse41(void *dst, const void *src, unsigned long len) +{ + char buf[16]; + + /* Flush the internal buffer of potential stale gfx data */ + _mm_mfence(); + + if ((uintptr_t)src & 15) { + __m128i *S = (__m128i *)((uintptr_t)src & ~15); + unsigned long misalign = (uintptr_t)src & 15; + unsigned long copy = min(len, 16 - misalign); + + _mm_storeu_si128((__m128i *)buf, + _mm_stream_load_si128(S)); + + memcpy(dst, buf + misalign, copy); + + dst += copy; + src += copy; + len -= copy; + } + + /* We assume we are doing bulk transfers, so prefer aligned moves */ + if (((uintptr_t)dst & 15) == 0) { + while (len >= 64) { + __m128i *S = (__m128i *)src; + __m128i *D = (__m128i *)dst; + __m128i tmp[4]; + + tmp[0] = _mm_stream_load_si128(S + 0); + tmp[1] = _mm_stream_load_si128(S + 1); + tmp[2] = _mm_stream_load_si128(S + 2); + tmp[3] = _mm_stream_load_si128(S + 3); + + _mm_store_si128(D + 0, tmp[0]); + _mm_store_si128(D + 1, tmp[1]); + _mm_store_si128(D + 2, tmp[2]); + _mm_store_si128(D + 3, tmp[3]); + + src += 64; + dst += 64; + len -= 64; + } + } else { + while (len >= 64) { + __m128i *S = (__m128i *)src; + __m128i *D = (__m128i *)dst; + __m128i tmp[4]; + + tmp[0] = _mm_stream_load_si128(S + 0); + tmp[1] = _mm_stream_load_si128(S + 1); + tmp[2] = _mm_stream_load_si128(S + 2); + tmp[3] = _mm_stream_load_si128(S + 3); + + _mm_storeu_si128(D + 0, tmp[0]); + _mm_storeu_si128(D + 1, tmp[1]); + _mm_storeu_si128(D + 2, tmp[2]); + _mm_storeu_si128(D + 3, tmp[3]); + + src += 64; + dst += 64; + len -= 64; + } + } + + while (len >= 16) { + _mm_storeu_si128((__m128i *)dst, + _mm_stream_load_si128((__m128i *)src)); + + src += 16; + dst += 16; + len -= 16; + } + + if (len) { + _mm_storeu_si128((__m128i *)buf, + _mm_stream_load_si128((__m128i *)src)); + memcpy(dst, buf, len); + } +} + +#pragma GCC pop_options + +static void memcpy_from_wc(void *dst, const void *src, unsigned long len) +{ + memcpy(dst, src, len); +} + +static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long) +{ + if (igt_x86_features() & SSE4_1) + return memcpy_from_wc_sse41; + + return memcpy_from_wc; +} + +void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len) + __attribute__((ifunc("resolve_memcpy_from_wc"))); + +#else +void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len) +{ + memcpy(dst, src, len); +} +#endif diff --git a/lib/igt_x86.h b/lib/igt_x86.h index 27b7f0fd..d4f8c343 100644 --- a/lib/igt_x86.h +++ b/lib/igt_x86.h @@ -55,4 +55,6 @@ static inline char *igt_x86_features_to_string(unsigned features, char *line) } #endif +void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len); + #endif /* IGT_X86_H */ diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c index c8ff961d..2d7fb2ff 100644 --- a/tests/gem_fence_thrash.c +++ b/tests/gem_fence_thrash.c @@ -107,75 +107,16 @@ bo_copy (void *_arg) return NULL; } -#if defined(__x86_64__) && !defined(__clang__) - -#pragma GCC push_options -#pragma GCC target("sse4.1") - -#include <smmintrin.h> - -#define MOVNT 512 - -__attribute__((noinline)) -static void copy_wc_page(void *dst, void *src) -{ - if (igt_x86_features() & SSE4_1) { - __m128i *S = (__m128i *)src; - __m128i *D = (__m128i *)dst; - - for (int i = 0; i < PAGE_SIZE/CACHELINE; i++) { - __m128i tmp[4]; - - tmp[0] = _mm_stream_load_si128(S++); - tmp[1] = _mm_stream_load_si128(S++); - tmp[2] = _mm_stream_load_si128(S++); - tmp[3] = _mm_stream_load_si128(S++); - - _mm_store_si128(D++, tmp[0]); - _mm_store_si128(D++, tmp[1]); - _mm_store_si128(D++, tmp[2]); - _mm_store_si128(D++, tmp[3]); - } - } else - memcpy(dst, src, PAGE_SIZE); -} - -static void copy_wc_cacheline(void *dst, void *src) -{ - if (igt_x86_features() & SSE4_1) { - __m128i *S = (__m128i *)src; - __m128i *D = (__m128i *)dst; - __m128i tmp[4]; - - tmp[0] = _mm_stream_load_si128(S++); - tmp[1] = _mm_stream_load_si128(S++); - tmp[2] = _mm_stream_load_si128(S++); - tmp[3] = _mm_stream_load_si128(S++); - - _mm_store_si128(D++, tmp[0]); - _mm_store_si128(D++, tmp[1]); - _mm_store_si128(D++, tmp[2]); - _mm_store_si128(D++, tmp[3]); - } else - memcpy(dst, src, CACHELINE); -} - -#pragma GCC pop_options - -#else - static void copy_wc_page(void *dst, const void *src) { - memcpy(dst, src, PAGE_SIZE); + igt_memcpy_from_wc(dst, src, PAGE_SIZE); } static void copy_wc_cacheline(void *dst, const void *src) { - memcpy(dst, src, CACHELINE); + igt_memcpy_from_wc(dst, src, CACHELINE); } -#endif - static void _bo_write_verify(struct test *t) { diff --git a/tests/gem_mmap_gtt.c b/tests/gem_mmap_gtt.c index 0f598125..6a332b25 100644 --- a/tests/gem_mmap_gtt.c +++ b/tests/gem_mmap_gtt.c @@ -529,45 +529,10 @@ test_huge_bo(int fd, int huge, int tiling) munmap(linear_pattern, PAGE_SIZE); } -#if defined(__x86_64__) && !defined(__clang__) -#define MOVNT 512 - -#pragma GCC push_options -#pragma GCC target("sse4.1") - -#include <smmintrin.h> -__attribute__((noinline)) -static void copy_wc_page(void *dst, void *src) -{ - if (igt_x86_features() & SSE4_1) { - __m128i *S = (__m128i *)src; - __m128i *D = (__m128i *)dst; - - for (int i = 0; i < PAGE_SIZE/64; i++) { - __m128i tmp[4]; - - tmp[0] = _mm_stream_load_si128(S++); - tmp[1] = _mm_stream_load_si128(S++); - tmp[2] = _mm_stream_load_si128(S++); - tmp[3] = _mm_stream_load_si128(S++); - - _mm_store_si128(D++, tmp[0]); - _mm_store_si128(D++, tmp[1]); - _mm_store_si128(D++, tmp[2]); - _mm_store_si128(D++, tmp[3]); - } - } else - memcpy(dst, src, PAGE_SIZE); -} - -#pragma GCC pop_options - -#else static void copy_wc_page(void *dst, const void *src) { - memcpy(dst, src, PAGE_SIZE); + igt_memcpy_from_wc(dst, src, PAGE_SIZE); } -#endif static unsigned int tile_row_size(int tiling, unsigned int stride) { diff --git a/tests/gem_tiled_pread_pwrite.c b/tests/gem_tiled_pread_pwrite.c index 7b5577fd..313daa38 100644 --- a/tests/gem_tiled_pread_pwrite.c +++ b/tests/gem_tiled_pread_pwrite.c @@ -100,45 +100,10 @@ create_bo(int fd) return handle; } -#if defined(__x86_64__) && !defined(__clang__) -#define MOVNT 512 - -#pragma GCC push_options -#pragma GCC target("sse4.1") - -#include <smmintrin.h> -__attribute__((noinline)) -static void copy_wc_page(void *dst, void *src) -{ - if (igt_x86_features() & SSE4_1) { - __m128i *S = (__m128i *)src; - __m128i *D = (__m128i *)dst; - - for (int i = 0; i < PAGE_SIZE/64; i++) { - __m128i tmp[4]; - - tmp[0] = _mm_stream_load_si128(S++); - tmp[1] = _mm_stream_load_si128(S++); - tmp[2] = _mm_stream_load_si128(S++); - tmp[3] = _mm_stream_load_si128(S++); - - _mm_store_si128(D++, tmp[0]); - _mm_store_si128(D++, tmp[1]); - _mm_store_si128(D++, tmp[2]); - _mm_store_si128(D++, tmp[3]); - } - } else - memcpy(dst, src, PAGE_SIZE); -} - -#pragma GCC pop_options - -#else static void copy_wc_page(void *dst, const void *src) { - memcpy(dst, src, PAGE_SIZE); + igt_memcpy_from_wc(dst, src, PAGE_SIZE); } -#endif igt_simple_main { |