i915: Fix streaming loads for intel_tiled_memcpy
We stream from a tiled and aligned source into an unaligned user buffer,
so we need to use _mm_storeu_si128.
Fixes: d21c086d81
(i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
18c50498db
commit
f5e8b13f78
|
@ -223,17 +223,17 @@ _memcpy_streaming_load(void *dest, const void *src, size_t count)
|
|||
{
|
||||
if (count == 16) {
|
||||
__m128i val = _mm_stream_load_si128((__m128i *)src);
|
||||
_mm_store_si128((__m128i *)dest, val);
|
||||
_mm_storeu_si128((__m128i *)dest, val);
|
||||
return dest;
|
||||
} else if (count == 64) {
|
||||
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
|
||||
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
|
||||
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
|
||||
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
|
||||
_mm_store_si128(((__m128i *)dest) + 0, val0);
|
||||
_mm_store_si128(((__m128i *)dest) + 1, val1);
|
||||
_mm_store_si128(((__m128i *)dest) + 2, val2);
|
||||
_mm_store_si128(((__m128i *)dest) + 3, val3);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 0, val0);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 1, val1);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 2, val2);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 3, val3);
|
||||
return dest;
|
||||
} else {
|
||||
assert(count < 64); /* and (count < 16) for ytiled */
|
||||
|
|
Loading…
Reference in New Issue