aco: Extract merged_wave_info_to_mask to its own function.

Currently we only use this at the beginning of merged shader parts,
but we are going to need to use	it with	some NGG code as well.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3576>
This commit is contained in:
Timur Kristóf 2020-03-31 10:49:52 +02:00 committed by Marge Bot
parent 90b1047fdf
commit d345bfe195
1 changed files with 26 additions and 19 deletions

View File

@ -10254,6 +10254,31 @@ void cleanup_cfg(Program *program)
}
}
Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i)
{
Builder bld(ctx->program, ctx->block);
/* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
Temp count = i == 0
? get_arg(ctx, ctx->args->merged_wave_info)
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u));
Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
Temp cond;
if (ctx->program->wave_size == 64) {
/* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
} else {
/* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
cond = emit_extract_vector(ctx, mask, 0, bld.lm);
}
return cond;
}
void select_program(Program *program,
unsigned shader_count,
struct nir_shader *const *shaders,
@ -10291,25 +10316,7 @@ void select_program(Program *program,
bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader);
bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info;
if (check_merged_wave_info) {
Builder bld(ctx.program, ctx.block);
/* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
Temp count = i == 0 ? get_arg(&ctx, args->merged_wave_info)
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
get_arg(&ctx, args->merged_wave_info), Operand(i * 8u));
Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
Temp cond;
if (ctx.program->wave_size == 64) {
/* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
} else {
/* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
cond = emit_extract_vector(&ctx, mask, 0, bld.lm);
}
Temp cond = merged_wave_info_to_mask(&ctx, i);
begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond);
}