radeonsi: simplify how pipeline statistic offsets are computed
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
parent
57b7dcd9db
commit
928e5f240d
|
@ -1864,8 +1864,7 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
|
|||
ctx->ac.i32_1,
|
||||
ngg_get_emulated_counters_buf(ctx),
|
||||
LLVMConstInt(ctx->ac.i32,
|
||||
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
|
||||
SI_QUERY_STATS_END_OFFSET_DW) * 4,
|
||||
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
|
||||
false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
ctx->ac.i32_0, /* cachepolicy */
|
||||
|
@ -2195,8 +2194,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx)
|
|||
ctx->ac.i32_1,
|
||||
ngg_get_emulated_counters_buf(ctx),
|
||||
LLVMConstInt(ctx->ac.i32,
|
||||
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) +
|
||||
SI_QUERY_STATS_END_OFFSET_DW) * 4,
|
||||
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
|
||||
false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
ctx->ac.i32_0, /* cachepolicy */
|
||||
|
|
|
@ -637,27 +637,35 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_
|
|||
return true;
|
||||
}
|
||||
|
||||
int si_hw_query_dw_offset(int index)
|
||||
static unsigned si_query_pipestats_num_results(struct si_screen *sscreen)
|
||||
{
|
||||
return 11;
|
||||
}
|
||||
|
||||
static unsigned si_query_pipestat_dw_offset(enum pipe_statistics_query_index index)
|
||||
{
|
||||
/* Offset in dwords in the query buffer of the start value
|
||||
* for the given counter.
|
||||
*/
|
||||
switch (index) {
|
||||
case PIPE_STAT_QUERY_IA_VERTICES: return 14;
|
||||
case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12;
|
||||
case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6;
|
||||
case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8;
|
||||
case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10;
|
||||
case PIPE_STAT_QUERY_C_INVOCATIONS: return 4;
|
||||
case PIPE_STAT_QUERY_C_PRIMITIVES: return 2;
|
||||
case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0;
|
||||
case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16;
|
||||
case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18;
|
||||
case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return -1;
|
||||
case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0;
|
||||
case PIPE_STAT_QUERY_C_PRIMITIVES: return 2;
|
||||
case PIPE_STAT_QUERY_C_INVOCATIONS: return 4;
|
||||
case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6;
|
||||
case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8;
|
||||
case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10;
|
||||
case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12;
|
||||
case PIPE_STAT_QUERY_IA_VERTICES: return 14;
|
||||
case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16;
|
||||
case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18;
|
||||
case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return ~0;
|
||||
}
|
||||
|
||||
unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen,
|
||||
enum pipe_statistics_query_index index)
|
||||
{
|
||||
return si_query_pipestats_num_results(sscreen) * 2 + si_query_pipestat_dw_offset(index);
|
||||
}
|
||||
|
||||
static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *squery,
|
||||
|
@ -725,8 +733,7 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned
|
|||
query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
/* 11 values on GCN. */
|
||||
query->result_size = 11 * 16;
|
||||
query->result_size = si_query_pipestats_num_results(sscreen) * 16;
|
||||
query->result_size += 8; /* for the fence + alignment */
|
||||
query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
|
||||
query->index = index;
|
||||
|
@ -860,7 +867,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
|
|||
const uint32_t zero = 0;
|
||||
radeon_begin(cs);
|
||||
/* Clear the emulated counter end value. We don't clear start because it's unused. */
|
||||
va += (si_hw_query_dw_offset(query->index) + SI_QUERY_STATS_END_OFFSET_DW) * 4;
|
||||
va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4;
|
||||
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + 1, 0));
|
||||
radeon_emit(S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
|
||||
radeon_emit(va);
|
||||
|
@ -1322,9 +1329,9 @@ static void si_get_hw_query_params(struct si_context *sctx, struct si_query_hw *
|
|||
params->fence_offset = squery->result_size - 4;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS: {
|
||||
params->start_offset = si_hw_query_dw_offset(index) * 4;
|
||||
params->end_offset = SI_QUERY_STATS_END_OFFSET_DW * 4 + params->start_offset;
|
||||
params->fence_offset = 2 * 88;
|
||||
params->start_offset = si_query_pipestat_dw_offset(index) * 4;
|
||||
params->end_offset = si_query_pipestat_end_dw_offset(sctx->screen, index) * 4;
|
||||
params->fence_offset = si_query_pipestats_num_results(sctx->screen) * 16;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -1404,10 +1411,9 @@ static void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw
|
|||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (int i = 0; i < 11; i++) {
|
||||
int start_offset = si_hw_query_dw_offset(i);
|
||||
result->pipeline_statistics.counters[i] +=
|
||||
si_query_read_result(buffer, start_offset,
|
||||
start_offset + SI_QUERY_STATS_END_OFFSET_DW, false);
|
||||
si_query_read_result(buffer, si_query_pipestat_dw_offset(i),
|
||||
si_query_pipestat_end_dw_offset(sscreen, i), false);
|
||||
}
|
||||
#if 0 /* for testing */
|
||||
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
|
||||
|
|
|
@ -127,14 +127,6 @@ enum
|
|||
SI_NUM_SW_QUERY_GROUPS
|
||||
};
|
||||
|
||||
/* The counters are stored in a buffer, each with a start and end value,
|
||||
* with this layout:
|
||||
* [start1][start2][...][startN][end1][end2][...][endN]
|
||||
* N is 11 and each value is a 64-bit int so we get:
|
||||
*/
|
||||
#define SI_QUERY_STATS_END_OFFSET_DW (11 * 2)
|
||||
int si_hw_query_dw_offset(int index);
|
||||
|
||||
struct si_query_ops {
|
||||
void (*destroy)(struct si_context *, struct si_query *);
|
||||
bool (*begin)(struct si_context *, struct si_query *);
|
||||
|
@ -227,6 +219,8 @@ struct si_query_hw {
|
|||
unsigned workaround_offset;
|
||||
};
|
||||
|
||||
unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen,
|
||||
enum pipe_statistics_query_index index);
|
||||
void si_query_hw_destroy(struct si_context *sctx, struct si_query *squery);
|
||||
bool si_query_hw_begin(struct si_context *sctx, struct si_query *squery);
|
||||
bool si_query_hw_end(struct si_context *sctx, struct si_query *squery);
|
||||
|
|
|
@ -245,8 +245,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
|
|||
prim,
|
||||
ngg_get_emulated_counters_buf(ctx),
|
||||
LLVMConstInt(ctx->ac.i32,
|
||||
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) +
|
||||
SI_QUERY_STATS_END_OFFSET_DW) * 4,
|
||||
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
|
||||
false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
ctx->ac.i32_0, /* cachepolicy */
|
||||
|
@ -255,9 +254,8 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
|
|||
|
||||
args[0] = ctx->ac.i32_1;
|
||||
args[2] = LLVMConstInt(ctx->ac.i32,
|
||||
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
|
||||
SI_QUERY_STATS_END_OFFSET_DW) * 4,
|
||||
false);
|
||||
si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
|
||||
false);
|
||||
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 5229);
|
||||
|
|
Loading…
Reference in New Issue