aco: return 0x76543210 for NULL FMASK fetch

This can replace several v_cndmask_b32 with a single v_cndmask_b32, and
will be useful when we lower sample index adjustment in NIR.

fossil-db (Sienna Cichlid):
Totals from 955 (0.64% of 150170) affected shaders:
VGPRs: 53232 -> 53208 (-0.05%)
CodeSize: 4712548 -> 4722016 (+0.20%); split: -0.02%, +0.23%
MaxWaves: 19052 -> 19056 (+0.02%)
Instrs: 875891 -> 875619 (-0.03%); split: -0.04%, +0.00%
Latency: 14070164 -> 14069089 (-0.01%); split: -0.02%, +0.01%
InvThroughput: 2322982 -> 2321419 (-0.07%); split: -0.08%, +0.01%
VClause: 23070 -> 23080 (+0.04%); split: -0.00%, +0.05%
SClause: 32463 -> 32426 (-0.11%); split: -0.12%, +0.01%
Copies: 42840 -> 42787 (-0.12%); split: -0.19%, +0.07%
Branches: 17907 -> 17900 (-0.04%); split: -0.06%, +0.02%
PreSGPRs: 43585 -> 43229 (-0.82%)
PreVGPRs: 47676 -> 47625 (-0.11%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12214>
This commit is contained in:
Rhys Perry 2021-08-04 16:13:47 +01:00 committed by Marge Bot
parent d30ed6ef8b
commit bf0cc05227
1 changed files with 36 additions and 20 deletions

View File

@ -6025,6 +6025,17 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
load->da = da;
load->dim = dim;
/* Don't adjust the sample index if WORD1.DATA_FORMAT of the FMASK
* resource descriptor is 0 (invalid),
*/
Temp is_not_null = bld.tmp(bld.lm);
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
.def(0)
.setHint(vcc);
fmask =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x76543210), fmask, is_not_null);
Operand sample_index4;
if (sample_index.isConstant()) {
if (sample_index.constantValue() < 16) {
@ -6041,28 +6052,12 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index);
}
Temp final_sample;
if (sample_index4.isConstant() && sample_index4.constantValue() == 0)
final_sample = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
else if (sample_index4.isConstant() && sample_index4.constantValue() == 28)
final_sample = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
else
final_sample =
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
* resource descriptor is 0 (invalid),
*/
Temp compare = bld.tmp(bld.lm);
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare), Operand::zero(),
emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
.def(0)
.setHint(vcc);
Temp sample_index_v = bld.copy(bld.def(v1), sample_index);
/* Replace the MSAA sample index. */
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), sample_index_v, final_sample, compare);
return bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
}
static std::vector<Temp>
@ -9556,7 +9551,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
dmask = 1 << instr->component;
if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
tmp_dst = bld.tmp(instr->is_sparse ? v5 : v4);
} else if (instr->op == nir_texop_samples_identical) {
} else if (instr->op == nir_texop_samples_identical ||
instr->op == nir_texop_fragment_mask_fetch_amd) {
tmp_dst = bld.tmp(v1);
} else if (util_bitcount(dmask) != instr->dest.ssa.num_components ||
dst.type() == RegType::sgpr) {
@ -9758,6 +9754,26 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
bld.vopc(aco_opcode::v_cmp_eq_u32, Definition(dst), Operand::zero(), tmp_dst)
.def(0)
.setHint(vcc);
} else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
/* Use 0x76543210 if the image doesn't have FMASK. */
assert(dmask == 1 && dst.bytes() == 4);
assert(dst.id() != tmp_dst.id());
if (dst.regClass() == s1) {
Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
emit_extract_vector(ctx, resource, 1, s1));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
bld.scc(is_not_null));
} else {
Temp is_not_null = bld.tmp(bld.lm);
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
emit_extract_vector(ctx, resource, 1, s1))
.def(0)
.setHint(vcc);
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst),
bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null);
}
} else {
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
}