radeonsi: simplify how pipeline statistic offsets are computed

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
Marek Olšák 2022-06-02 17:14:40 -04:00 committed by Marge Bot
parent 57b7dcd9db
commit 928e5f240d
4 changed files with 41 additions and 45 deletions

View File

@ -1864,8 +1864,7 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
ctx->ac.i32_1,
ngg_get_emulated_counters_buf(ctx),
LLVMConstInt(ctx->ac.i32,
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
SI_QUERY_STATS_END_OFFSET_DW) * 4,
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
false),
ctx->ac.i32_0, /* soffset */
ctx->ac.i32_0, /* cachepolicy */
@ -2195,8 +2194,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx)
ctx->ac.i32_1,
ngg_get_emulated_counters_buf(ctx),
LLVMConstInt(ctx->ac.i32,
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) +
SI_QUERY_STATS_END_OFFSET_DW) * 4,
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
false),
ctx->ac.i32_0, /* soffset */
ctx->ac.i32_0, /* cachepolicy */

View File

@ -637,27 +637,35 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_
return true;
}
int si_hw_query_dw_offset(int index)
static unsigned si_query_pipestats_num_results(struct si_screen *sscreen)
{
return 11;
}
static unsigned si_query_pipestat_dw_offset(enum pipe_statistics_query_index index)
{
/* Offset in dwords in the query buffer of the start value
* for the given counter.
*/
switch (index) {
case PIPE_STAT_QUERY_IA_VERTICES: return 14;
case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12;
case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6;
case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8;
case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10;
case PIPE_STAT_QUERY_C_INVOCATIONS: return 4;
case PIPE_STAT_QUERY_C_PRIMITIVES: return 2;
case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0;
case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16;
case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18;
case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20;
default:
assert(false);
}
return -1;
case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0;
case PIPE_STAT_QUERY_C_PRIMITIVES: return 2;
case PIPE_STAT_QUERY_C_INVOCATIONS: return 4;
case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6;
case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8;
case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10;
case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12;
case PIPE_STAT_QUERY_IA_VERTICES: return 14;
case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16;
case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18;
case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20;
default:
assert(false);
}
return ~0;
}
unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen,
enum pipe_statistics_query_index index)
{
return si_query_pipestats_num_results(sscreen) * 2 + si_query_pipestat_dw_offset(index);
}
static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *squery,
@ -725,8 +733,7 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned
query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on GCN. */
query->result_size = 11 * 16;
query->result_size = si_query_pipestats_num_results(sscreen) * 16;
query->result_size += 8; /* for the fence + alignment */
query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
query->index = index;
@ -860,7 +867,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
const uint32_t zero = 0;
radeon_begin(cs);
/* Clear the emulated counter end value. We don't clear start because it's unused. */
va += (si_hw_query_dw_offset(query->index) + SI_QUERY_STATS_END_OFFSET_DW) * 4;
va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4;
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + 1, 0));
radeon_emit(S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(va);
@ -1322,9 +1329,9 @@ static void si_get_hw_query_params(struct si_context *sctx, struct si_query_hw *
params->fence_offset = squery->result_size - 4;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
params->start_offset = si_hw_query_dw_offset(index) * 4;
params->end_offset = SI_QUERY_STATS_END_OFFSET_DW * 4 + params->start_offset;
params->fence_offset = 2 * 88;
params->start_offset = si_query_pipestat_dw_offset(index) * 4;
params->end_offset = si_query_pipestat_end_dw_offset(sctx->screen, index) * 4;
params->fence_offset = si_query_pipestats_num_results(sctx->screen) * 16;
break;
}
default:
@ -1404,10 +1411,9 @@ static void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
for (int i = 0; i < 11; i++) {
int start_offset = si_hw_query_dw_offset(i);
result->pipeline_statistics.counters[i] +=
si_query_read_result(buffer, start_offset,
start_offset + SI_QUERY_STATS_END_OFFSET_DW, false);
si_query_read_result(buffer, si_query_pipestat_dw_offset(i),
si_query_pipestat_end_dw_offset(sscreen, i), false);
}
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "

View File

@ -127,14 +127,6 @@ enum
SI_NUM_SW_QUERY_GROUPS
};
/* The counters are stored in a buffer, each with a start and end value,
* with this layout:
* [start1][start2][...][startN][end1][end2][...][endN]
* N is 11 and each value is a 64-bit int so we get:
*/
#define SI_QUERY_STATS_END_OFFSET_DW (11 * 2)
int si_hw_query_dw_offset(int index);
struct si_query_ops {
void (*destroy)(struct si_context *, struct si_query *);
bool (*begin)(struct si_context *, struct si_query *);
@ -227,6 +219,8 @@ struct si_query_hw {
unsigned workaround_offset;
};
unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen,
enum pipe_statistics_query_index index);
void si_query_hw_destroy(struct si_context *sctx, struct si_query *squery);
bool si_query_hw_begin(struct si_context *sctx, struct si_query *squery);
bool si_query_hw_end(struct si_context *sctx, struct si_query *squery);

View File

@ -245,8 +245,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
prim,
ngg_get_emulated_counters_buf(ctx),
LLVMConstInt(ctx->ac.i32,
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) +
SI_QUERY_STATS_END_OFFSET_DW) * 4,
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
false),
ctx->ac.i32_0, /* soffset */
ctx->ac.i32_0, /* cachepolicy */
@ -255,9 +254,8 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
args[0] = ctx->ac.i32_1;
args[2] = LLVMConstInt(ctx->ac.i32,
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
SI_QUERY_STATS_END_OFFSET_DW) * 4,
false);
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
false);
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
}
ac_build_endif(&ctx->ac, 5229);