summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-02-27 21:45:14 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2018-03-01 08:46:14 +0000
commit6a06d014a38fc8282f7dda7c11e5cacd9daf44ca (patch)
tree3ed22f665ea29a563f9298618ea5d119e2d5e24d /tests
parent5aed726a723d0abd42e36a26dd6349739fefd568 (diff)
lib: Provide an accelerated routine for readback from WC
Reading from WC is awfully slow as each access is uncached and so performed synchronously, stalling for the memory load. x86 did introduce some new instructions in SSE 4.1 to provide a small internal buffer to accelerate reading back a cacheline at a time from uncached memory, for this purpose. v2: Don't be lazy and handle misalignment. v3: Switch out of sse41 before emitting the generic memcpy routine v4: Replace opencoded memcpy_from_wc v5: Always flush the internal buffer before use (Eric) v6: Assume bulk moves, so check for dst alignment. v7: Use _mm_fence for _buitlin_ia32_mfence for consistency, remove superfluous defines (Ville) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Eric Anholt <eric@anholt.net> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/gem_fence_thrash.c63
-rw-r--r--tests/gem_mmap_gtt.c37
-rw-r--r--tests/gem_tiled_pread_pwrite.c37
3 files changed, 4 insertions, 133 deletions
diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c
index c8ff961d..2d7fb2ff 100644
--- a/tests/gem_fence_thrash.c
+++ b/tests/gem_fence_thrash.c
@@ -107,75 +107,16 @@ bo_copy (void *_arg)
return NULL;
}
-#if defined(__x86_64__) && !defined(__clang__)
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-
-#define MOVNT 512
-
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/CACHELINE; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-static void copy_wc_cacheline(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- } else
- memcpy(dst, src, CACHELINE);
-}
-
-#pragma GCC pop_options
-
-#else
-
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
static void copy_wc_cacheline(void *dst, const void *src)
{
- memcpy(dst, src, CACHELINE);
+ igt_memcpy_from_wc(dst, src, CACHELINE);
}
-#endif
-
static void
_bo_write_verify(struct test *t)
{
diff --git a/tests/gem_mmap_gtt.c b/tests/gem_mmap_gtt.c
index 0f598125..6a332b25 100644
--- a/tests/gem_mmap_gtt.c
+++ b/tests/gem_mmap_gtt.c
@@ -529,45 +529,10 @@ test_huge_bo(int fd, int huge, int tiling)
munmap(linear_pattern, PAGE_SIZE);
}
-#if defined(__x86_64__) && !defined(__clang__)
-#define MOVNT 512
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/64; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-#pragma GCC pop_options
-
-#else
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
-#endif
static unsigned int tile_row_size(int tiling, unsigned int stride)
{
diff --git a/tests/gem_tiled_pread_pwrite.c b/tests/gem_tiled_pread_pwrite.c
index 7b5577fd..313daa38 100644
--- a/tests/gem_tiled_pread_pwrite.c
+++ b/tests/gem_tiled_pread_pwrite.c
@@ -100,45 +100,10 @@ create_bo(int fd)
return handle;
}
-#if defined(__x86_64__) && !defined(__clang__)
-#define MOVNT 512
-
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-
-#include <smmintrin.h>
-__attribute__((noinline))
-static void copy_wc_page(void *dst, void *src)
-{
- if (igt_x86_features() & SSE4_1) {
- __m128i *S = (__m128i *)src;
- __m128i *D = (__m128i *)dst;
-
- for (int i = 0; i < PAGE_SIZE/64; i++) {
- __m128i tmp[4];
-
- tmp[0] = _mm_stream_load_si128(S++);
- tmp[1] = _mm_stream_load_si128(S++);
- tmp[2] = _mm_stream_load_si128(S++);
- tmp[3] = _mm_stream_load_si128(S++);
-
- _mm_store_si128(D++, tmp[0]);
- _mm_store_si128(D++, tmp[1]);
- _mm_store_si128(D++, tmp[2]);
- _mm_store_si128(D++, tmp[3]);
- }
- } else
- memcpy(dst, src, PAGE_SIZE);
-}
-
-#pragma GCC pop_options
-
-#else
static void copy_wc_page(void *dst, const void *src)
{
- memcpy(dst, src, PAGE_SIZE);
+ igt_memcpy_from_wc(dst, src, PAGE_SIZE);
}
-#endif
igt_simple_main
{