diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-02-27 21:45:14 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-03-01 08:46:14 +0000 |
commit | 6a06d014a38fc8282f7dda7c11e5cacd9daf44ca (patch) | |
tree | 3ed22f665ea29a563f9298618ea5d119e2d5e24d /tests/gem_tiled_pread_pwrite.c | |
parent | 5aed726a723d0abd42e36a26dd6349739fefd568 (diff) |
lib: Provide an accelerated routine for readback from WC
Reading from WC is awfully slow as each access is uncached and so
performed synchronously, stalling for the memory load. x86 did introduce
some new instructions in SSE 4.1 to provide a small internal buffer to
accelerate reading back a cacheline at a time from uncached memory, for
this purpose.
v2: Don't be lazy and handle misalignment.
v3: Switch out of sse41 before emitting the generic memcpy routine
v4: Replace opencoded memcpy_from_wc
v5: Always flush the internal buffer before use (Eric)
v6: Assume bulk moves, so check for dst alignment.
v7: Use _mm_fence for _buitlin_ia32_mfence for consistency, remove
superfluous defines (Ville)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Eric Anholt <eric@anholt.net>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Diffstat (limited to 'tests/gem_tiled_pread_pwrite.c')
-rw-r--r-- | tests/gem_tiled_pread_pwrite.c | 37 |
1 files changed, 1 insertions, 36 deletions
diff --git a/tests/gem_tiled_pread_pwrite.c b/tests/gem_tiled_pread_pwrite.c index 7b5577fd..313daa38 100644 --- a/tests/gem_tiled_pread_pwrite.c +++ b/tests/gem_tiled_pread_pwrite.c @@ -100,45 +100,10 @@ create_bo(int fd) return handle; } -#if defined(__x86_64__) && !defined(__clang__) -#define MOVNT 512 - -#pragma GCC push_options -#pragma GCC target("sse4.1") - -#include <smmintrin.h> -__attribute__((noinline)) -static void copy_wc_page(void *dst, void *src) -{ - if (igt_x86_features() & SSE4_1) { - __m128i *S = (__m128i *)src; - __m128i *D = (__m128i *)dst; - - for (int i = 0; i < PAGE_SIZE/64; i++) { - __m128i tmp[4]; - - tmp[0] = _mm_stream_load_si128(S++); - tmp[1] = _mm_stream_load_si128(S++); - tmp[2] = _mm_stream_load_si128(S++); - tmp[3] = _mm_stream_load_si128(S++); - - _mm_store_si128(D++, tmp[0]); - _mm_store_si128(D++, tmp[1]); - _mm_store_si128(D++, tmp[2]); - _mm_store_si128(D++, tmp[3]); - } - } else - memcpy(dst, src, PAGE_SIZE); -} - -#pragma GCC pop_options - -#else static void copy_wc_page(void *dst, const void *src) { - memcpy(dst, src, PAGE_SIZE); + igt_memcpy_from_wc(dst, src, PAGE_SIZE); } -#endif igt_simple_main { |