i965: Revert recent tiled memcpy changes.
This reverts commit79fe00efb4
. This reverts commitf5e8b13f78
. This reverts commitd21c086d81
. They broke the Android build and I'd rather not leave it broken for the long holiday weekend.
This commit is contained in:
parent
79fe00efb4
commit
58fb613a51
|
@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
|
|||
|
||||
noinst_LTLIBRARIES = \
|
||||
libi965_dri.la \
|
||||
libintel_tiled_memcpy.la \
|
||||
$(I965_PERGEN_LIBS)
|
||||
|
||||
libintel_tiled_memcpy_la_SOURCES = \
|
||||
$(intel_tiled_memcpy_FILES)
|
||||
libintel_tiled_memcpy_la_CFLAGS = \
|
||||
$(AM_CFLAGS) $(SSE41_CFLAGS)
|
||||
|
||||
libi965_dri_la_SOURCES = \
|
||||
$(i965_FILES) \
|
||||
$(i965_oa_GENERATED_FILES)
|
||||
|
@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \
|
|||
$(top_builddir)/src/intel/compiler/libintel_compiler.la \
|
||||
$(top_builddir)/src/intel/blorp/libblorp.la \
|
||||
$(I965_PERGEN_LIBS) \
|
||||
libintel_tiled_memcpy.la
|
||||
$(LIBDRM_LIBS)
|
||||
|
||||
BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
|
||||
|
|
|
@ -110,13 +110,11 @@ i965_FILES = \
|
|||
intel_tex_image.c \
|
||||
intel_tex_obj.h \
|
||||
intel_tex_validate.c \
|
||||
intel_tiled_memcpy.c \
|
||||
intel_tiled_memcpy.h \
|
||||
intel_upload.c \
|
||||
libdrm_macros.h
|
||||
|
||||
intel_tiled_memcpy_FILES = \
|
||||
intel_tiled_memcpy.c \
|
||||
intel_tiled_memcpy.h
|
||||
|
||||
i965_gen4_FILES = \
|
||||
genX_blorp_exec.c \
|
||||
genX_state_upload.c
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include "intel_image.h"
|
||||
#include "intel_mipmap_tree.h"
|
||||
#include "intel_tex.h"
|
||||
#include "intel_tiled_memcpy.h"
|
||||
#include "intel_blit.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
|
@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
|
|||
}
|
||||
|
||||
static void
|
||||
intel_miptree_unmap_map(struct brw_context *brw,
|
||||
intel_miptree_unmap_gtt(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
|
@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw,
|
|||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_map(struct brw_context *brw,
|
||||
intel_miptree_map_gtt(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
|
@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw,
|
|||
mt, _mesa_get_format_name(mt->format),
|
||||
x, y, map->ptr, map->stride);
|
||||
|
||||
map->unmap = intel_miptree_unmap_map;
|
||||
map->unmap = intel_miptree_unmap_gtt;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
|
|||
intel_miptree_release(&map->linear_mt);
|
||||
}
|
||||
|
||||
/* Compute extent parameters for use with tiled_memcpy functions.
|
||||
* xs are in units of bytes and ys are in units of strides.
|
||||
*/
|
||||
static inline void
|
||||
tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice, unsigned int *x1_B,
|
||||
unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
|
||||
{
|
||||
unsigned int block_width, block_height;
|
||||
unsigned int x0_el, y0_el;
|
||||
|
||||
_mesa_get_format_block_size(mt->format, &block_width, &block_height);
|
||||
|
||||
assert(map->x % block_width == 0);
|
||||
assert(map->y % block_height == 0);
|
||||
|
||||
intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
|
||||
*x1_B = (map->x / block_width + x0_el) * mt->cpp;
|
||||
*y1_el = map->y / block_height + y0_el;
|
||||
*x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
|
||||
*y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level,
|
||||
unsigned int slice)
|
||||
{
|
||||
if (map->mode & GL_MAP_WRITE_BIT) {
|
||||
unsigned int x1, x2, y1, y2;
|
||||
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
|
||||
|
||||
char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
|
||||
dst += mt->offset;
|
||||
|
||||
linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
|
||||
map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);
|
||||
|
||||
intel_miptree_unmap_raw(mt);
|
||||
}
|
||||
_mesa_align_free(map->buffer);
|
||||
map->buffer = map->ptr = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_tiled_memcpy(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
{
|
||||
intel_miptree_access_raw(brw, mt, level, slice,
|
||||
map->mode & GL_MAP_WRITE_BIT);
|
||||
|
||||
unsigned int x1, x2, y1, y2;
|
||||
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
|
||||
map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
|
||||
|
||||
/* The tiling and detiling functions require that the linear buffer
|
||||
* has proper 16-byte alignment (that is, its `x0` is 16-byte
|
||||
* aligned). Here we over-allocate the linear buffer by enough
|
||||
* bytes to get the proper alignment.
|
||||
*/
|
||||
map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
|
||||
map->ptr = (char *)map->buffer + (x1 & 0xf);
|
||||
assert(map->buffer);
|
||||
|
||||
if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
|
||||
char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
|
||||
src += mt->offset;
|
||||
|
||||
const mem_copy_fn fn =
|
||||
#if defined(USE_SSE41)
|
||||
cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
|
||||
#endif
|
||||
memcpy;
|
||||
|
||||
tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
|
||||
mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
|
||||
fn);
|
||||
|
||||
intel_miptree_unmap_raw(mt);
|
||||
}
|
||||
|
||||
map->unmap = intel_miptree_unmap_tiled_memcpy;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_blit(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
|
@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw,
|
|||
void **out_ptr,
|
||||
ptrdiff_t *out_stride)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct intel_miptree_map *map;
|
||||
|
||||
assert(mt->surf.samples == 1);
|
||||
|
@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw,
|
|||
intel_miptree_map_depthstencil(brw, mt, map, level, slice);
|
||||
} else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
|
||||
intel_miptree_map_blit(brw, mt, map, level, slice);
|
||||
} else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) {
|
||||
intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
|
||||
#if defined(USE_SSE41)
|
||||
} else if (!(mode & GL_MAP_WRITE_BIT) &&
|
||||
!mt->compressed && cpu_has_sse4_1 &&
|
||||
|
@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw,
|
|||
intel_miptree_map_movntdqa(brw, mt, map, level, slice);
|
||||
#endif
|
||||
} else {
|
||||
if (mt->surf.tiling != ISL_TILING_LINEAR)
|
||||
perf_debug("intel_miptree_map: mapping via gtt");
|
||||
intel_miptree_map_map(brw, mt, map, level, slice);
|
||||
intel_miptree_map_gtt(brw, mt, map, level, slice);
|
||||
}
|
||||
|
||||
*out_ptr = map->ptr;
|
||||
|
|
|
@ -36,10 +36,6 @@
|
|||
#include "brw_context.h"
|
||||
#include "intel_tiled_memcpy.h"
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
#include "main/streaming-load-memcpy.h"
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined(__SSE2__)
|
||||
|
@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
|
|||
return dst;
|
||||
}
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
static ALWAYS_INLINE void *
|
||||
_memcpy_streaming_load(void *dest, const void *src, size_t count)
|
||||
{
|
||||
if (count == 16) {
|
||||
__m128i val = _mm_stream_load_si128((__m128i *)src);
|
||||
_mm_storeu_si128((__m128i *)dest, val);
|
||||
return dest;
|
||||
} else if (count == 64) {
|
||||
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
|
||||
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
|
||||
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
|
||||
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 0, val0);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 1, val1);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 2, val2);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 3, val3);
|
||||
return dest;
|
||||
} else {
|
||||
assert(count < 64); /* and (count < 16) for ytiled */
|
||||
return memcpy(dest, src, count);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
|
||||
* These ranges are in bytes, i.e. pixels * bytes-per-pixel.
|
||||
|
@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
|
@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
|
@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
|
|||
unreachable("unsupported tiling");
|
||||
}
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
|
||||
/* The hidden cacheline sized register used by movntdqa can apparently
|
||||
* give you stale data, so do an mfence to invalidate it.
|
||||
*/
|
||||
_mm_mfence();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Round out to tile boundaries. */
|
||||
xt0 = ALIGN_DOWN(xt1, tw);
|
||||
xt3 = ALIGN_UP (xt2, tw);
|
||||
|
|
|
@ -129,13 +129,10 @@ files_i965 = files(
|
|||
'intel_tex_image.c',
|
||||
'intel_tex_obj.h',
|
||||
'intel_tex_validate.c',
|
||||
'intel_upload.c',
|
||||
'libdrm_macros.h',
|
||||
)
|
||||
|
||||
files_intel_tiled_memcpy = files(
|
||||
'intel_tiled_memcpy.c',
|
||||
'intel_tiled_memcpy.h',
|
||||
'intel_upload.c',
|
||||
'libdrm_macros.h',
|
||||
)
|
||||
|
||||
i965_gen_libs = []
|
||||
|
@ -179,15 +176,6 @@ i965_oa_sources = custom_target(
|
|||
],
|
||||
)
|
||||
|
||||
intel_tiled_memcpy = static_library(
|
||||
'intel_tiled_memcpy',
|
||||
[files_intel_tiled_memcpy],
|
||||
include_directories : [
|
||||
inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
|
||||
],
|
||||
c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
|
||||
)
|
||||
|
||||
libi965 = static_library(
|
||||
'i965',
|
||||
[files_i965, i965_oa_sources, ir_expression_operation_h,
|
||||
|
@ -199,7 +187,7 @@ libi965 = static_library(
|
|||
cpp_args : [cpp_vis_args, '-msse2'],
|
||||
link_with : [
|
||||
i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
|
||||
libblorp, intel_tiled_memcpy,
|
||||
libblorp,
|
||||
],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue