radeonsi/gfx10: fix overflow and primitive queries

This aligns the offsets to match the memory layout of the query buffer
defined by gfx10_sh_query_buffer_mem and calls si_launch_grid_internal
to flush caches and wait for completion of shaders prior to retrieving
results.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7181>
This commit is contained in:
Indrajit Kumar Das 2020-10-16 10:09:02 +05:30 committed by Marge Bot
parent fd4016f978
commit 5d14562da8
4 changed files with 28 additions and 23 deletions

View File

@ -360,11 +360,11 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
if (index >= 0) { if (index >= 0) {
switch (query->b.type) { switch (query->b.type) {
case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_GENERATED:
consts.offset = sizeof(uint32_t) * query->stream; consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
consts.config = 0; consts.config = 0;
break; break;
case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_EMITTED:
consts.offset = sizeof(uint32_t) * (4 + query->stream); consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
consts.config = 0; consts.config = 0;
break; break;
case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_STATISTICS:
@ -372,7 +372,7 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
consts.config = 0; consts.config = 0;
break; break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
consts.offset = sizeof(uint32_t) * query->stream; consts.offset = 4 * sizeof(uint64_t) * query->stream;
consts.config = 2; consts.config = 2;
break; break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
@ -454,8 +454,9 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
} }
sctx->b.launch_grid(&sctx->b, &grid); void *saved_cs = sctx->cs_shader_state.program;
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; si_launch_grid_internal((struct si_context *)&sctx->b, &grid, saved_cs,
SI_CS_WAIT_FOR_IDLE | SI_CS_PARTIAL_FLUSH_DISABLE);
if (qbuf == query->last) if (qbuf == query->last)
break; break;

View File

@ -60,15 +60,13 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
} }
} }
#define SI_CS_IMAGE_OP (1 << 0) void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
#define SI_CS_WAIT_FOR_IDLE (1 << 1)
#define SI_CS_RENDER_COND_ENABLE (1 << 2)
static void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
void *restore_cs, unsigned flags) void *restore_cs, unsigned flags)
{ {
/* Wait for previous shaders to finish. */ /* Wait for previous shaders to finish. */
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
if (!(flags & SI_CS_PARTIAL_FLUSH_DISABLE))
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
/* Invalidate L0-L1 caches. */ /* Invalidate L0-L1 caches. */
/* sL0 is never invalidated, because src resources don't use it. */ /* sL0 is never invalidated, because src resources don't use it. */
sctx->flags |= SI_CONTEXT_INV_VCACHE; sctx->flags |= SI_CONTEXT_INV_VCACHE;

View File

@ -1339,8 +1339,15 @@ bool vi_dcc_clear_level(struct si_context *sctx, struct si_texture *tex, unsigne
void si_init_clear_functions(struct si_context *sctx); void si_init_clear_functions(struct si_context *sctx);
/* si_compute_blit.c */ /* si_compute_blit.c */
#define SI_CS_IMAGE_OP (1 << 0)
#define SI_CS_WAIT_FOR_IDLE (1 << 1)
#define SI_CS_RENDER_COND_ENABLE (1 << 2)
#define SI_CS_PARTIAL_FLUSH_DISABLE (1 << 3)
unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
enum si_cache_policy cache_policy); enum si_cache_policy cache_policy);
void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
void *restore_cs, unsigned flags);
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset,
uint64_t size, uint32_t *clear_value, uint32_t clear_value_size, uint64_t size, uint32_t *clear_value, uint32_t clear_value_size,
enum si_coherency coher, bool force_cpdma); enum si_coherency coher, bool force_cpdma);

View File

@ -816,7 +816,7 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
"DCL BUFFER[2]\n" "DCL BUFFER[2]\n"
"DCL CONST[0][0..0]\n" "DCL CONST[0][0..0]\n"
"DCL TEMP[0..5]\n" "DCL TEMP[0..5]\n"
"IMM[0] UINT32 {0, 7, 0, 4294967295}\n" "IMM[0] UINT32 {0, 7, 256, 4294967295}\n"
"IMM[1] UINT32 {1, 2, 4, 8}\n" "IMM[1] UINT32 {1, 2, 4, 8}\n"
"IMM[2] UINT32 {16, 32, 64, 128}\n" "IMM[2] UINT32 {16, 32, 64, 128}\n"
@ -855,13 +855,13 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
"UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n" "UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n"
/* /*
fence = buffer[0]@(base_offset + 32); fence = buffer[0]@(base_offset + sizeof(gfx10_sh_query_buffer_mem.stream));
if (!fence) { if (!fence) {
acc_missing = ~0u; acc_missing = ~0u;
break; break;
} }
*/ */
"UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy\n" "UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].wwww\n"
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
"USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n" "USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
"UIF TEMP[5]\n" "UIF TEMP[5]\n"
@ -897,22 +897,21 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
/* /*
do { do {
generated = buffer[0]@stream_offset; generated = buffer[0]@(stream_offset + 2 * sizeof(uint64_t));
emitted = buffer[0]@(stream_offset + 16); emitted = buffer[0]@(stream_offset + 3 * sizeof(uint64_t));
if (generated != emitted) { if (generated != emitted) {
acc_result = 1; acc_result = 1;
result_remaining = 0; result_remaining = 0;
break; break;
} }
stream_offset += 4; stream_offset += sizeof(gfx10_sh_query_buffer_mem.stream[0]);
} while (--count); } while (--count);
*/ */
"BGNLOOP\n" "BGNLOOP\n"
"UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n" "UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n"
"LOAD TEMP[4].x, BUFFER[0], TEMP[2].xxxx\n" "LOAD TEMP[4].xyzw, BUFFER[0], TEMP[5].xxxx\n"
"LOAD TEMP[4].y, BUFFER[0], TEMP[5].xxxx\n" "USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n"
"USNE TEMP[5], TEMP[4].xxxx, TEMP[4].yyyy\n"
"UIF TEMP[5]\n" "UIF TEMP[5]\n"
"MOV TEMP[0].x, IMM[1].xxxx\n" "MOV TEMP[0].x, IMM[1].xxxx\n"
"MOV TEMP[1].y, IMM[0].xxxx\n" "MOV TEMP[1].y, IMM[0].xxxx\n"
@ -924,15 +923,15 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
"UIF TEMP[5]\n" "UIF TEMP[5]\n"
"BRK\n" "BRK\n"
"ENDIF\n" "ENDIF\n"
"UADD TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz\n" "UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy\n"
"ENDLOOP\n" "ENDLOOP\n"
"ENDIF\n" "ENDIF\n"
/* /*
base_offset += 64; base_offset += sizeof(gfx10_sh_query_buffer_mem);
} // end outer loop } // end outer loop
*/ */
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[2].zzzz\n" "UADD TEMP[1].y, TEMP[1].yyyy, IMM[0].zzzz\n"
"ENDLOOP\n" "ENDLOOP\n"
/* /*