aco: remove pack_half_2x16(a, 0) optimization
This makes the compiler less predictable and should only have a very small
effect on performance.
fossil-db (Vega):
Totals from 2410 (1.79% of 134756) affected shaders:
CodeSize: 6911568 -> 6942840 (+0.45%)
Fixes Horizon Zero Dawn artifacts.
If a shader has:
a = pack_half_2x16(a, 0) //rtne
store(pack_half_2x16(0, b) | a) //rtne
a = unpack_2x16(a).x
It will become:
store(pack_half_2x16(a, b)) //rtz
a = unpack_2x16(pack_half_2x16(a, 0)).x //rtne
So a later shader with "unpack_2x16(load()).x" will use "a" rounded to
zero, while the previous shader will use "a" rounded to the nearest even.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 2f125908b3
("radv,aco: lower_pack_half_2x16")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14475>
This commit is contained in:
parent
6e08d8fc3d
commit
60c711833f
|
@ -3250,11 +3250,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||||
case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
|
case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
|
||||||
case nir_op_pack_half_2x16_split: {
|
case nir_op_pack_half_2x16_split: {
|
||||||
if (dst.regClass() == v1) {
|
if (dst.regClass() == v1) {
|
||||||
nir_const_value* val = nir_src_as_const_value(instr->src[1].src);
|
if (!ctx->block->fp_mode.care_about_round16_64 ||
|
||||||
if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) {
|
|
||||||
/* upper bits zero on GFX6-GFX9 */
|
|
||||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
|
|
||||||
} else if (!ctx->block->fp_mode.care_about_round16_64 ||
|
|
||||||
ctx->block->fp_mode.round16_64 == fp_round_tz) {
|
ctx->block->fp_mode.round16_64 == fp_round_tz) {
|
||||||
if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
|
if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
|
||||||
|
|
Loading…
Reference in New Issue