summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/igt_fb.c3
-rw-r--r--lib/igt_x86.c116
-rw-r--r--lib/igt_x86.h2
-rw-r--r--tests/gem_fence_thrash.c63
-rw-r--r--tests/gem_mmap_gtt.c37
-rw-r--r--tests/gem_tiled_pread_pwrite.c37
6 files changed, 124 insertions, 134 deletions
diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index ecd73053..7404ba7c 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -32,6 +32,7 @@
#include "drmtest.h"
#include "igt_fb.h"
#include "igt_kms.h"
+#include "igt_x86.h"
#include "ioctl_wrappers.h"
#include "intel_chipset.h"
@@ -1340,7 +1341,7 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
* it's faster to copy the whole BO to a temporary buffer and convert
* from there.
*/
- memcpy(buf, blit->linear.map, blit->linear.size);
+ igt_memcpy_from_wc(buf, blit->linear.map, blit->linear.size);
y = &buf[blit->linear.offsets[0]];
uv = &buf[blit->linear.offsets[1]];
diff --git a/lib/igt_x86.c b/lib/igt_x86.c
index 0ed3c6f1..cb1e0a72 100644
--- a/lib/igt_x86.c
+++ b/lib/igt_x86.c
@@ -36,7 +36,11 @@
#endif
#include "igt_x86.h"
+#include "igt_aux.h"
+
+#include <stdint.h>
#include <stdio.h>
+#include <string.h>
/**
* SECTION:igt_x86
@@ -174,3 +178,115 @@ char *igt_x86_features_to_string(unsigned features, char *line)
return ret;
}
#endif
+
+#if defined(__x86_64__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+
+#include <smmintrin.h>
+static void memcpy_from_wc_sse41(void *dst, const void *src, unsigned long len)
+{
+ char buf[16];
+
+ /* Flush the internal buffer of potential stale gfx data */
+ _mm_mfence();
+
+ if ((uintptr_t)src & 15) {
+ __m128i *S = (__m128i *)((uintptr_t)src & ~15);
+ unsigned long misalign = (uintptr_t)src & 15;
+ unsigned long copy = min(len, 16 - misalign);
+
+ _mm_storeu_si128((__m128i *)buf,
+ _mm_stream_load_si128(S));
+
+ memcpy(dst, buf + misalign, copy);
+
+ dst += copy;
+ src += copy;
+ len -= copy;
+ }
+
+ /* We assume we are doing bulk transfers, so prefer aligned moves */
+ if (((uintptr_t)dst & 15) == 0) {
+ while (len >= 64) {
+ __m128i *S = (__m128i *)src;
+ __m128i *D = (__m128i *)dst;
+ __m128i tmp[4];
+
+ tmp[0] = _mm_stream_load_si128(S + 0);
+ tmp[1] = _mm_stream_load_si128(S + 1);
+ tmp[2] = _mm_stream_load_si128(S + 2);
+ tmp[3] = _mm_stream_load_si128(S + 3);
+
+ _mm_store_si128(D + 0, tmp[0]);
+ _mm_store_si128(D + 1, tmp[1]);
+ _mm_store_si128(D + 2, tmp[2]);
+ _mm_store_si128(D + 3, tmp[3]);
+
+ src += 64;
+ dst += 64;
+ len -= 64;
+ }
+ } else {
+ while (len >= 64) {
+ __m128i *S = (__m128i *)src;
+ __m128i *D = (__m128i *)dst;
+ __m128i tmp[4];
+
+ tmp[0] = _mm_stream_load_si128(S + 0);
+ tmp[1] = _mm_stream_load_si128(S + 1);
+ tmp[2] = _mm_stream_load_si128(S + 2);
+ tmp[3] = _mm_stream_load_si128(S + 3);
+
+ _mm_storeu_si128(D + 0, tmp[0]);
+ _mm_storeu_si128(D + 1, tmp[1]);
+ _mm_storeu_si128(D + 2, tmp[2]);
+ _mm_storeu_si128(D + 3, tmp[3]);
+
+ src += 64;
+ dst += 64;
+ len -= 64;
+ }
+ }
+
+ while (len >= 16) {
+ _mm_storeu_si128((__m128i *)dst,
+ _mm_stream_load_si128((__m128i *)src));
+
+ src += 16;
+ dst += 16;
+ len -= 16;
+ }
+
+ if (len) {
+ _mm_storeu_si128((__m128i *)buf,
+ _mm_stream_load_si128((__m128i *)src));
+ memcpy(dst, buf, len);
+ }
+}
+
+#pragma GCC pop_options
+
+static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+ memcpy(dst, src, len);
+}
+
+static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
+{
+ if (igt_x86_features() & SSE4_1)
+ return memcpy_from_wc_sse41;
+
+ return memcpy_from_wc;
+}
+
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+ __attribute__((ifunc("resolve_memcpy_from_wc")));
+
+#else
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+ memcpy(dst, src, len);
+}
+#endif
diff --git a/lib/igt_x86.h b/lib/igt_x86.h
index 27b7f0fd..d4f8c343 100644
--- a/lib/igt_x86.h
+++ b/lib/igt_x86.h
@@ -55,4 +55,6 @@ static inline char *igt_x86_features_to_string(unsigned features, char *line)
}
#endif
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
#endif /* IGT_X86_H */
diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c
index c8ff961d..2d7fb2ff 100644
--- a/tests/gem_fence_thrash.c
+++ b/tests/gem_fence_thrash.c
@@ -107,75 +107,16 @@ bo_copy (void *_arg)
return NULL;
}
-#if defined(__x86_64__) && !defined(__clang__)
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-
-#define MOVNT 512
-
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/CACHELINE; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-static void copy_wc_cacheline(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- } else
- memcpy(dst, src, CACHELINE);
-}
-
-#pragma GCC pop_options
-
-#else
-
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
static void copy_wc_cacheline(void *dst, const void *src)
{
- memcpy(dst, src, CACHELINE);
+ igt_memcpy_from_wc(dst, src, CACHELINE);
}
-#endif
-
static void
_bo_write_verify(struct test *t)
{
diff --git a/tests/gem_mmap_gtt.c b/tests/gem_mmap_gtt.c
index 0f598125..6a332b25 100644
--- a/tests/gem_mmap_gtt.c
+++ b/tests/gem_mmap_gtt.c
@@ -529,45 +529,10 @@ test_huge_bo(int fd, int huge, int tiling)
munmap(linear_pattern, PAGE_SIZE);
}
-#if defined(__x86_64__) && !defined(__clang__)
-#define MOVNT 512
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/64; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-#pragma GCC pop_options
-
-#else
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
-#endif
static unsigned int tile_row_size(int tiling, unsigned int stride)
{
diff --git a/tests/gem_tiled_pread_pwrite.c b/tests/gem_tiled_pread_pwrite.c
index 7b5577fd..313daa38 100644
--- a/tests/gem_tiled_pread_pwrite.c
+++ b/tests/gem_tiled_pread_pwrite.c
@@ -100,45 +100,10 @@ create_bo(int fd)
return handle;
}
-#if defined(__x86_64__) && !defined(__clang__)
-#define MOVNT 512
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/64; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-#pragma GCC pop_options
-
-#else
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
-#endif
igt_simple_main
{