i965: Revert recent tiled memcpy changes.

This reverts commit 79fe00efb4.
This reverts commit f5e8b13f78.
This reverts commit d21c086d81.

They broke the Android build and I'd rather not leave it broken
for the long holiday weekend.
This commit is contained in:
Kenneth Graunke 2018-05-26 16:25:34 -07:00
parent 79fe00efb4
commit 58fb613a51
5 changed files with 9 additions and 186 deletions

View File

@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
noinst_LTLIBRARIES = \
libi965_dri.la \
libintel_tiled_memcpy.la \
$(I965_PERGEN_LIBS)
libintel_tiled_memcpy_la_SOURCES = \
$(intel_tiled_memcpy_FILES)
libintel_tiled_memcpy_la_CFLAGS = \
$(AM_CFLAGS) $(SSE41_CFLAGS)
libi965_dri_la_SOURCES = \
$(i965_FILES) \
$(i965_oa_GENERATED_FILES)
@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \
$(top_builddir)/src/intel/compiler/libintel_compiler.la \
$(top_builddir)/src/intel/blorp/libblorp.la \
$(I965_PERGEN_LIBS) \
libintel_tiled_memcpy.la
$(LIBDRM_LIBS)
BUILT_SOURCES = $(i965_oa_GENERATED_FILES)

View File

@ -110,13 +110,11 @@ i965_FILES = \
intel_tex_image.c \
intel_tex_obj.h \
intel_tex_validate.c \
intel_tiled_memcpy.c \
intel_tiled_memcpy.h \
intel_upload.c \
libdrm_macros.h
intel_tiled_memcpy_FILES = \
intel_tiled_memcpy.c \
intel_tiled_memcpy.h
i965_gen4_FILES = \
genX_blorp_exec.c \
genX_state_upload.c

View File

@ -31,7 +31,6 @@
#include "intel_image.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
#include "intel_tiled_memcpy.h"
#include "intel_blit.h"
#include "intel_fbo.h"
@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
}
static void
intel_miptree_unmap_map(struct brw_context *brw,
intel_miptree_unmap_gtt(struct brw_context *brw,
struct intel_mipmap_tree *mt,
struct intel_miptree_map *map,
unsigned int level, unsigned int slice)
@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw,
}
static void
intel_miptree_map_map(struct brw_context *brw,
intel_miptree_map_gtt(struct brw_context *brw,
struct intel_mipmap_tree *mt,
struct intel_miptree_map *map,
unsigned int level, unsigned int slice)
@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw,
mt, _mesa_get_format_name(mt->format),
x, y, map->ptr, map->stride);
map->unmap = intel_miptree_unmap_map;
map->unmap = intel_miptree_unmap_gtt;
}
static void
@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
intel_miptree_release(&map->linear_mt);
}
/* Compute extent parameters for use with tiled_memcpy functions.
* xs are in units of bytes and ys are in units of strides.
*/
static inline void
tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
unsigned int level, unsigned int slice, unsigned int *x1_B,
unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
{
unsigned int block_width, block_height;
unsigned int x0_el, y0_el;
_mesa_get_format_block_size(mt->format, &block_width, &block_height);
assert(map->x % block_width == 0);
assert(map->y % block_height == 0);
intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
*x1_B = (map->x / block_width + x0_el) * mt->cpp;
*y1_el = map->y / block_height + y0_el;
*x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
*y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
}
static void
intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
struct intel_mipmap_tree *mt,
struct intel_miptree_map *map,
unsigned int level,
unsigned int slice)
{
if (map->mode & GL_MAP_WRITE_BIT) {
unsigned int x1, x2, y1, y2;
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
dst += mt->offset;
linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);
intel_miptree_unmap_raw(mt);
}
_mesa_align_free(map->buffer);
map->buffer = map->ptr = NULL;
}
static void
intel_miptree_map_tiled_memcpy(struct brw_context *brw,
struct intel_mipmap_tree *mt,
struct intel_miptree_map *map,
unsigned int level, unsigned int slice)
{
intel_miptree_access_raw(brw, mt, level, slice,
map->mode & GL_MAP_WRITE_BIT);
unsigned int x1, x2, y1, y2;
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
/* The tiling and detiling functions require that the linear buffer
* has proper 16-byte alignment (that is, its `x0` is 16-byte
* aligned). Here we over-allocate the linear buffer by enough
* bytes to get the proper alignment.
*/
map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
map->ptr = (char *)map->buffer + (x1 & 0xf);
assert(map->buffer);
if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
src += mt->offset;
const mem_copy_fn fn =
#if defined(USE_SSE41)
cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
#endif
memcpy;
tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
fn);
intel_miptree_unmap_raw(mt);
}
map->unmap = intel_miptree_unmap_tiled_memcpy;
}
static void
intel_miptree_map_blit(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw,
void **out_ptr,
ptrdiff_t *out_stride)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct intel_miptree_map *map;
assert(mt->surf.samples == 1);
@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw,
intel_miptree_map_depthstencil(brw, mt, map, level, slice);
} else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
intel_miptree_map_blit(brw, mt, map, level, slice);
} else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) {
intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
#if defined(USE_SSE41)
} else if (!(mode & GL_MAP_WRITE_BIT) &&
!mt->compressed && cpu_has_sse4_1 &&
@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw,
intel_miptree_map_movntdqa(brw, mt, map, level, slice);
#endif
} else {
if (mt->surf.tiling != ISL_TILING_LINEAR)
perf_debug("intel_miptree_map: mapping via gtt");
intel_miptree_map_map(brw, mt, map, level, slice);
intel_miptree_map_gtt(brw, mt, map, level, slice);
}
*out_ptr = map->ptr;

View File

@ -36,10 +36,6 @@
#include "brw_context.h"
#include "intel_tiled_memcpy.h"
#if defined(USE_SSE41)
#include "main/streaming-load-memcpy.h"
#include <smmintrin.h>
#endif
#if defined(__SSSE3__)
#include <tmmintrin.h>
#elif defined(__SSE2__)
@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
return dst;
}
#if defined(USE_SSE41)
static ALWAYS_INLINE void *
_memcpy_streaming_load(void *dest, const void *src, size_t count)
{
if (count == 16) {
__m128i val = _mm_stream_load_si128((__m128i *)src);
_mm_storeu_si128((__m128i *)dest, val);
return dest;
} else if (count == 64) {
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
_mm_storeu_si128(((__m128i *)dest) + 0, val0);
_mm_storeu_si128(((__m128i *)dest) + 1, val1);
_mm_storeu_si128(((__m128i *)dest) + 2, val2);
_mm_storeu_si128(((__m128i *)dest) + 3, val3);
return dest;
} else {
assert(count < 64); /* and (count < 16) for ytiled */
return memcpy(dest, src, count);
}
}
#endif
/**
* Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
* These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(USE_SSE41)
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
} else {
@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(USE_SSE41)
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
}
@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(USE_SSE41)
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
} else {
@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
rgba8_copy, rgba8_copy_aligned_src);
#if defined(USE_SSE41)
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
memcpy, _memcpy_streaming_load);
#endif
else
unreachable("not reached");
}
@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
unreachable("unsupported tiling");
}
#if defined(USE_SSE41)
if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
/* The hidden cacheline sized register used by movntdqa can apparently
* give you stale data, so do an mfence to invalidate it.
*/
_mm_mfence();
}
#endif
/* Round out to tile boundaries. */
xt0 = ALIGN_DOWN(xt1, tw);
xt3 = ALIGN_UP (xt2, tw);

View File

@ -129,13 +129,10 @@ files_i965 = files(
'intel_tex_image.c',
'intel_tex_obj.h',
'intel_tex_validate.c',
'intel_upload.c',
'libdrm_macros.h',
)
files_intel_tiled_memcpy = files(
'intel_tiled_memcpy.c',
'intel_tiled_memcpy.h',
'intel_upload.c',
'libdrm_macros.h',
)
i965_gen_libs = []
@ -179,15 +176,6 @@ i965_oa_sources = custom_target(
],
)
intel_tiled_memcpy = static_library(
'intel_tiled_memcpy',
[files_intel_tiled_memcpy],
include_directories : [
inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
],
c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
)
libi965 = static_library(
'i965',
[files_i965, i965_oa_sources, ir_expression_operation_h,
@ -199,7 +187,7 @@ libi965 = static_library(
cpp_args : [cpp_vis_args, '-msse2'],
link_with : [
i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
libblorp, intel_tiled_memcpy,
libblorp,
],
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
)