radeonsi/gfx10: fix overflow and primitive queries
This aligns the offsets to match the memory layout of the query buffer defined by gfx10_sh_query_buffer_mem and calls si_launch_grid_internal to flush caches and wait for completion of shaders prior to retrieving results. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7181>
This commit is contained in:
parent
fd4016f978
commit
5d14562da8
|
@ -360,11 +360,11 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
|
||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
switch (query->b.type) {
|
switch (query->b.type) {
|
||||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||||
consts.offset = sizeof(uint32_t) * query->stream;
|
consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
|
||||||
consts.config = 0;
|
consts.config = 0;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||||
consts.offset = sizeof(uint32_t) * (4 + query->stream);
|
consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
|
||||||
consts.config = 0;
|
consts.config = 0;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_SO_STATISTICS:
|
case PIPE_QUERY_SO_STATISTICS:
|
||||||
|
@ -372,7 +372,7 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
|
||||||
consts.config = 0;
|
consts.config = 0;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||||
consts.offset = sizeof(uint32_t) * query->stream;
|
consts.offset = 4 * sizeof(uint64_t) * query->stream;
|
||||||
consts.config = 2;
|
consts.config = 2;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||||
|
@ -454,8 +454,9 @@ static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct s
|
||||||
si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
|
si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->b.launch_grid(&sctx->b, &grid);
|
void *saved_cs = sctx->cs_shader_state.program;
|
||||||
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
si_launch_grid_internal((struct si_context *)&sctx->b, &grid, saved_cs,
|
||||||
|
SI_CS_WAIT_FOR_IDLE | SI_CS_PARTIAL_FLUSH_DISABLE);
|
||||||
|
|
||||||
if (qbuf == query->last)
|
if (qbuf == query->last)
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -60,15 +60,13 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SI_CS_IMAGE_OP (1 << 0)
|
void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
|
||||||
#define SI_CS_WAIT_FOR_IDLE (1 << 1)
|
|
||||||
#define SI_CS_RENDER_COND_ENABLE (1 << 2)
|
|
||||||
|
|
||||||
static void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
|
|
||||||
void *restore_cs, unsigned flags)
|
void *restore_cs, unsigned flags)
|
||||||
{
|
{
|
||||||
/* Wait for previous shaders to finish. */
|
/* Wait for previous shaders to finish. */
|
||||||
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
|
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||||
|
if (!(flags & SI_CS_PARTIAL_FLUSH_DISABLE))
|
||||||
|
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||||
/* Invalidate L0-L1 caches. */
|
/* Invalidate L0-L1 caches. */
|
||||||
/* sL0 is never invalidated, because src resources don't use it. */
|
/* sL0 is never invalidated, because src resources don't use it. */
|
||||||
sctx->flags |= SI_CONTEXT_INV_VCACHE;
|
sctx->flags |= SI_CONTEXT_INV_VCACHE;
|
||||||
|
|
|
@ -1339,8 +1339,15 @@ bool vi_dcc_clear_level(struct si_context *sctx, struct si_texture *tex, unsigne
|
||||||
void si_init_clear_functions(struct si_context *sctx);
|
void si_init_clear_functions(struct si_context *sctx);
|
||||||
|
|
||||||
/* si_compute_blit.c */
|
/* si_compute_blit.c */
|
||||||
|
#define SI_CS_IMAGE_OP (1 << 0)
|
||||||
|
#define SI_CS_WAIT_FOR_IDLE (1 << 1)
|
||||||
|
#define SI_CS_RENDER_COND_ENABLE (1 << 2)
|
||||||
|
#define SI_CS_PARTIAL_FLUSH_DISABLE (1 << 3)
|
||||||
|
|
||||||
unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
|
unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
|
||||||
enum si_cache_policy cache_policy);
|
enum si_cache_policy cache_policy);
|
||||||
|
void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
|
||||||
|
void *restore_cs, unsigned flags);
|
||||||
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset,
|
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset,
|
||||||
uint64_t size, uint32_t *clear_value, uint32_t clear_value_size,
|
uint64_t size, uint32_t *clear_value, uint32_t clear_value_size,
|
||||||
enum si_coherency coher, bool force_cpdma);
|
enum si_coherency coher, bool force_cpdma);
|
||||||
|
|
|
@ -816,7 +816,7 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
|
||||||
"DCL BUFFER[2]\n"
|
"DCL BUFFER[2]\n"
|
||||||
"DCL CONST[0][0..0]\n"
|
"DCL CONST[0][0..0]\n"
|
||||||
"DCL TEMP[0..5]\n"
|
"DCL TEMP[0..5]\n"
|
||||||
"IMM[0] UINT32 {0, 7, 0, 4294967295}\n"
|
"IMM[0] UINT32 {0, 7, 256, 4294967295}\n"
|
||||||
"IMM[1] UINT32 {1, 2, 4, 8}\n"
|
"IMM[1] UINT32 {1, 2, 4, 8}\n"
|
||||||
"IMM[2] UINT32 {16, 32, 64, 128}\n"
|
"IMM[2] UINT32 {16, 32, 64, 128}\n"
|
||||||
|
|
||||||
|
@ -855,13 +855,13 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
|
||||||
"UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n"
|
"UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
fence = buffer[0]@(base_offset + 32);
|
fence = buffer[0]@(base_offset + sizeof(gfx10_sh_query_buffer_mem.stream));
|
||||||
if (!fence) {
|
if (!fence) {
|
||||||
acc_missing = ~0u;
|
acc_missing = ~0u;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
"UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].yyyy\n"
|
"UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].wwww\n"
|
||||||
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
|
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
|
||||||
"USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
|
"USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
|
||||||
"UIF TEMP[5]\n"
|
"UIF TEMP[5]\n"
|
||||||
|
@ -897,22 +897,21 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
do {
|
do {
|
||||||
generated = buffer[0]@stream_offset;
|
generated = buffer[0]@(stream_offset + 2 * sizeof(uint64_t));
|
||||||
emitted = buffer[0]@(stream_offset + 16);
|
emitted = buffer[0]@(stream_offset + 3 * sizeof(uint64_t));
|
||||||
if (generated != emitted) {
|
if (generated != emitted) {
|
||||||
acc_result = 1;
|
acc_result = 1;
|
||||||
result_remaining = 0;
|
result_remaining = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
stream_offset += 4;
|
stream_offset += sizeof(gfx10_sh_query_buffer_mem.stream[0]);
|
||||||
} while (--count);
|
} while (--count);
|
||||||
*/
|
*/
|
||||||
"BGNLOOP\n"
|
"BGNLOOP\n"
|
||||||
"UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n"
|
"UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n"
|
||||||
"LOAD TEMP[4].x, BUFFER[0], TEMP[2].xxxx\n"
|
"LOAD TEMP[4].xyzw, BUFFER[0], TEMP[5].xxxx\n"
|
||||||
"LOAD TEMP[4].y, BUFFER[0], TEMP[5].xxxx\n"
|
"USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n"
|
||||||
"USNE TEMP[5], TEMP[4].xxxx, TEMP[4].yyyy\n"
|
|
||||||
"UIF TEMP[5]\n"
|
"UIF TEMP[5]\n"
|
||||||
"MOV TEMP[0].x, IMM[1].xxxx\n"
|
"MOV TEMP[0].x, IMM[1].xxxx\n"
|
||||||
"MOV TEMP[1].y, IMM[0].xxxx\n"
|
"MOV TEMP[1].y, IMM[0].xxxx\n"
|
||||||
|
@ -924,15 +923,15 @@ void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
|
||||||
"UIF TEMP[5]\n"
|
"UIF TEMP[5]\n"
|
||||||
"BRK\n"
|
"BRK\n"
|
||||||
"ENDIF\n"
|
"ENDIF\n"
|
||||||
"UADD TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz\n"
|
"UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy\n"
|
||||||
"ENDLOOP\n"
|
"ENDLOOP\n"
|
||||||
"ENDIF\n"
|
"ENDIF\n"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
base_offset += 64;
|
base_offset += sizeof(gfx10_sh_query_buffer_mem);
|
||||||
} // end outer loop
|
} // end outer loop
|
||||||
*/
|
*/
|
||||||
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[2].zzzz\n"
|
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[0].zzzz\n"
|
||||||
"ENDLOOP\n"
|
"ENDLOOP\n"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue