From e34bb4738aa3b69da7abd5e91d8933ead2d0acdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Tue, 26 Mar 2024 20:30:53 +0100 Subject: [PATCH] r300: move presubtract pass later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to move it after rc_remove_constants because we can't reswizzle presubtract sources and we need to pack multiple uniforms into vec4 slots to be able to run glamor shaders on R400. This mostly works OK, the detection logic just needs to recognize the case when 2.0 was converted to inline constant. RV530: total instructions in shared programs: 129384 -> 129366 (-0.01%) instructions in affected programs: 2078 -> 2060 (-0.87%) helped: 16 HURT: 7 total presub in shared programs: 8404 -> 8421 (0.20%) presub in affected programs: 423 -> 440 (4.02%) helped: 19 HURT: 25 total omod in shared programs: 410 -> 429 (4.63%) omod in affected programs: 10 -> 29 (190.00%) helped: 0 HURT: 13 total temps in shared programs: 17560 -> 17561 (<.01%) temps in affected programs: 34 -> 35 (2.94%) helped: 3 HURT: 3 total lits in shared programs: 3006 -> 3021 (0.50%) lits in affected programs: 267 -> 282 (5.62%) helped: 13 HURT: 20 total cycles in shared programs: 197840 -> 197845 (<.01%) cycles in affected programs: 4159 -> 4164 (0.12%) helped: 13 HURT: 13 R420: total instructions in shared programs: 103810 -> 103762 (-0.05%) instructions in affected programs: 4176 -> 4128 (-1.15%) helped: 29 HURT: 1 total presub in shared programs: 2724 -> 2878 (5.65%) presub in affected programs: 478 -> 632 (32.22%) helped: 6 HURT: 40 total omod in shared programs: 411 -> 421 (2.43%) omod in affected programs: 3 -> 13 (333.33%) helped: 0 HURT: 7 total temps in shared programs: 16841 -> 16852 (0.07%) temps in affected programs: 814 -> 825 (1.35%) helped: 5 HURT: 17 total cycles in shared programs: 162397 -> 162361 (-0.02%) cycles in affected programs: 5746 -> 5710 (-0.63%) helped: 30 HURT: 3 Signed-off-by: Pavel Ondračka Part-of: --- .../drivers/r300/compiler/r3xx_fragprog.c | 7 ++++ .../r300/compiler/radeon_compiler_util.c | 15 ++++++++ .../r300/compiler/radeon_compiler_util.h | 2 ++ .../drivers/r300/compiler/radeon_dataflow.h | 1 + .../drivers/r300/compiler/radeon_optimize.c | 35 ++++++++----------- .../r300/compiler/radeon_program_print.c | 14 ++------ 6 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c index 8968848dab2fe..d530ca085090d 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -121,6 +121,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { NULL, NULL } }; + struct radeon_program_transformation opt_presubtract[] = { + { &rc_opt_presubtract, NULL }, + { NULL, NULL } + }; + + /* List of compiler passes. */ struct radeon_compiler_pass fs_list[] = { /* NAME DUMP PREDICATE FUNCTION PARAM */ @@ -137,6 +143,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, &opt}, {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c index f6ecd835c34c0..5038b679e888b 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -8,6 +8,8 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" #include "r300_fragprog_swizzle.h" + +#include "util/u_math.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -748,3 +750,16 @@ bool rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst) inst->U.I.SrcReg[1].Index != inst->U.I.SrcReg[2].Index && inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[2].Index); } + +float rc_inline_to_float(int index) +{ + int r300_exponent = (index >> 3) & 0xf; + unsigned r300_mantissa = index & 0x7; + unsigned float_exponent; + unsigned real_float; + + r300_exponent -= 7; + float_exponent = r300_exponent + 127; + real_float = (r300_mantissa << 20) | (float_exponent << 23); + return uif(real_float); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h index 71436bf1cbf33..91c4aa27c2c76 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -107,4 +107,6 @@ float rc_get_constant_value( unsigned int rc_get_scalar_src_swz(unsigned int swizzle); bool rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst); + +float rc_inline_to_float(int index); #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h index 0f3618d7274d7..29ba9a8b37070 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -111,5 +111,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); void rc_optimize(struct radeon_compiler * c, void *user); void rc_inline_literals(struct radeon_compiler *c, void *user); +int rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data); #endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 87ed650b54572..f11462b36ceee 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -623,15 +623,20 @@ static int peephole_mad_presub_bias( struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1]; if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs) return 0; - struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index]; - if (constant->Type != RC_CONSTANT_IMMEDIATE) - return 0; - for (i = 0; i < 4; i++) { - if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) - continue; - swz = GET_SWZ(src1_reg.Swizzle, i); - if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0) + if (src1_reg.File == RC_FILE_INLINE) { + if (rc_inline_to_float(src1_reg.Index) != 2.0f) + return 0; + } else { + struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index]; + if (constant->Type != RC_CONSTANT_IMMEDIATE) return 0; + for (i = 0; i < 4; i++) { + if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) + continue; + swz = GET_SWZ(src1_reg.Swizzle, i); + if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0) + return 0; + } } /* Check src0. */ @@ -835,11 +840,9 @@ static int peephole_mul_omod( * 0 if inst is still part of the program. * 1 if inst is no longer part of the program. */ -static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +int +rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data) { - if (!c->has_presub) - return 0; - switch(inst->U.I.Opcode) { case RC_OPCODE_ADD: { @@ -1459,14 +1462,6 @@ void rc_optimize(struct radeon_compiler * c, void *user) return; } - /* Presubtract operations. */ - inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - peephole(c, cur); - } - /* Output modifiers. */ inst = c->Program.Instructions.Next; struct rc_list * var_list = NULL; diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c index 124d0574716c3..4da2c04cb5021 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_print.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -3,8 +3,8 @@ * SPDX-License-Identifier: MIT */ -#include "util/u_math.h" #include "radeon_program.h" +#include "radeon_compiler_util.h" #include @@ -95,17 +95,7 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun static void rc_print_inline_float(FILE * f, int index) { - int r300_exponent = (index >> 3) & 0xf; - unsigned r300_mantissa = index & 0x7; - unsigned float_exponent; - unsigned real_float; - - r300_exponent -= 7; - float_exponent = r300_exponent + 127; - real_float = (r300_mantissa << 20) | (float_exponent << 23); - - fprintf(f, "%f (0x%x)", uif(real_float), index); - + fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index); } static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)