aco: return 0x76543210 for NULL FMASK fetch
This can replace several v_cndmask_b32 with a single v_cndmask_b32, and will be useful when we lower sample index adjustment in NIR. fossil-db (Sienna Cichlid): Totals from 955 (0.64% of 150170) affected shaders: VGPRs: 53232 -> 53208 (-0.05%) CodeSize: 4712548 -> 4722016 (+0.20%); split: -0.02%, +0.23% MaxWaves: 19052 -> 19056 (+0.02%) Instrs: 875891 -> 875619 (-0.03%); split: -0.04%, +0.00% Latency: 14070164 -> 14069089 (-0.01%); split: -0.02%, +0.01% InvThroughput: 2322982 -> 2321419 (-0.07%); split: -0.08%, +0.01% VClause: 23070 -> 23080 (+0.04%); split: -0.00%, +0.05% SClause: 32463 -> 32426 (-0.11%); split: -0.12%, +0.01% Copies: 42840 -> 42787 (-0.12%); split: -0.19%, +0.07% Branches: 17907 -> 17900 (-0.04%); split: -0.06%, +0.02% PreSGPRs: 43585 -> 43229 (-0.82%) PreVGPRs: 47676 -> 47625 (-0.11%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12214>
This commit is contained in:
parent
d30ed6ef8b
commit
bf0cc05227
|
@ -6025,6 +6025,17 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
|
|||
load->da = da;
|
||||
load->dim = dim;
|
||||
|
||||
/* Don't adjust the sample index if WORD1.DATA_FORMAT of the FMASK
|
||||
* resource descriptor is 0 (invalid),
|
||||
*/
|
||||
Temp is_not_null = bld.tmp(bld.lm);
|
||||
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
|
||||
emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
|
||||
.def(0)
|
||||
.setHint(vcc);
|
||||
fmask =
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x76543210), fmask, is_not_null);
|
||||
|
||||
Operand sample_index4;
|
||||
if (sample_index.isConstant()) {
|
||||
if (sample_index.constantValue() < 16) {
|
||||
|
@ -6041,28 +6052,12 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
|
|||
bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index);
|
||||
}
|
||||
|
||||
Temp final_sample;
|
||||
if (sample_index4.isConstant() && sample_index4.constantValue() == 0)
|
||||
final_sample = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
|
||||
return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
|
||||
else if (sample_index4.isConstant() && sample_index4.constantValue() == 28)
|
||||
final_sample = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
|
||||
return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
|
||||
else
|
||||
final_sample =
|
||||
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
|
||||
|
||||
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
|
||||
* resource descriptor is 0 (invalid),
|
||||
*/
|
||||
Temp compare = bld.tmp(bld.lm);
|
||||
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare), Operand::zero(),
|
||||
emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
|
||||
.def(0)
|
||||
.setHint(vcc);
|
||||
|
||||
Temp sample_index_v = bld.copy(bld.def(v1), sample_index);
|
||||
|
||||
/* Replace the MSAA sample index. */
|
||||
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), sample_index_v, final_sample, compare);
|
||||
return bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
|
||||
}
|
||||
|
||||
static std::vector<Temp>
|
||||
|
@ -9556,7 +9551,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
dmask = 1 << instr->component;
|
||||
if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
|
||||
tmp_dst = bld.tmp(instr->is_sparse ? v5 : v4);
|
||||
} else if (instr->op == nir_texop_samples_identical) {
|
||||
} else if (instr->op == nir_texop_samples_identical ||
|
||||
instr->op == nir_texop_fragment_mask_fetch_amd) {
|
||||
tmp_dst = bld.tmp(v1);
|
||||
} else if (util_bitcount(dmask) != instr->dest.ssa.num_components ||
|
||||
dst.type() == RegType::sgpr) {
|
||||
|
@ -9758,6 +9754,26 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
bld.vopc(aco_opcode::v_cmp_eq_u32, Definition(dst), Operand::zero(), tmp_dst)
|
||||
.def(0)
|
||||
.setHint(vcc);
|
||||
} else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
|
||||
/* Use 0x76543210 if the image doesn't have FMASK. */
|
||||
assert(dmask == 1 && dst.bytes() == 4);
|
||||
assert(dst.id() != tmp_dst.id());
|
||||
|
||||
if (dst.regClass() == s1) {
|
||||
Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
|
||||
emit_extract_vector(ctx, resource, 1, s1));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
|
||||
bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
|
||||
bld.scc(is_not_null));
|
||||
} else {
|
||||
Temp is_not_null = bld.tmp(bld.lm);
|
||||
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
|
||||
emit_extract_vector(ctx, resource, 1, s1))
|
||||
.def(0)
|
||||
.setHint(vcc);
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst),
|
||||
bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null);
|
||||
}
|
||||
} else {
|
||||
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue