r300: move presubtract pass later

We want to move it after rc_remove_constants because we can't reswizzle
presubtract sources and we need to pack multiple uniforms into vec4
slots to be able to run glamor shaders on R400.

This mostly works OK, the detection logic just needs to recognize the
case when 2.0 was converted to inline constant.

RV530:
total instructions in shared programs: 129384 -> 129366 (-0.01%)
instructions in affected programs: 2078 -> 2060 (-0.87%)
helped: 16
HURT: 7
total presub in shared programs: 8404 -> 8421 (0.20%)
presub in affected programs: 423 -> 440 (4.02%)
helped: 19
HURT: 25
total omod in shared programs: 410 -> 429 (4.63%)
omod in affected programs: 10 -> 29 (190.00%)
helped: 0
HURT: 13
total temps in shared programs: 17560 -> 17561 (<.01%)
temps in affected programs: 34 -> 35 (2.94%)
helped: 3
HURT: 3
total lits in shared programs: 3006 -> 3021 (0.50%)
lits in affected programs: 267 -> 282 (5.62%)
helped: 13
HURT: 20
total cycles in shared programs: 197840 -> 197845 (<.01%)
cycles in affected programs: 4159 -> 4164 (0.12%)
helped: 13
HURT: 13

R420:
total instructions in shared programs: 103810 -> 103762 (-0.05%)
instructions in affected programs: 4176 -> 4128 (-1.15%)
helped: 29
HURT: 1
total presub in shared programs: 2724 -> 2878 (5.65%)
presub in affected programs: 478 -> 632 (32.22%)
helped: 6
HURT: 40
total omod in shared programs: 411 -> 421 (2.43%)
omod in affected programs: 3 -> 13 (333.33%)
helped: 0
HURT: 7
total temps in shared programs: 16841 -> 16852 (0.07%)
temps in affected programs: 814 -> 825 (1.35%)
helped: 5
HURT: 17
total cycles in shared programs: 162397 -> 162361 (-0.02%)
cycles in affected programs: 5746 -> 5710 (-0.63%)
helped: 30
HURT: 3

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28428>
This commit is contained in:
Pavel Ondračka 2024-03-26 20:30:53 +01:00 committed by Marge Bot
parent a35a158b09
commit e34bb4738a
6 changed files with 42 additions and 32 deletions

View File

@ -121,6 +121,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ NULL, NULL }
};
struct radeon_program_transformation opt_presubtract[] = {
{ &rc_opt_presubtract, NULL },
{ NULL, NULL }
};
/* List of compiler passes. */
struct radeon_compiler_pass fs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
@ -137,6 +143,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, &opt},
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},

View File

@ -8,6 +8,8 @@
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
#include "r300_fragprog_swizzle.h"
#include "util/u_math.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
@ -748,3 +750,16 @@ bool rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst)
inst->U.I.SrcReg[1].Index != inst->U.I.SrcReg[2].Index &&
inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[2].Index);
}
float rc_inline_to_float(int index)
{
int r300_exponent = (index >> 3) & 0xf;
unsigned r300_mantissa = index & 0x7;
unsigned float_exponent;
unsigned real_float;
r300_exponent -= 7;
float_exponent = r300_exponent + 127;
real_float = (r300_mantissa << 20) | (float_exponent << 23);
return uif(real_float);
}

View File

@ -107,4 +107,6 @@ float rc_get_constant_value(
unsigned int rc_get_scalar_src_swz(unsigned int swizzle);
bool rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst);
float rc_inline_to_float(int index);
#endif /* RADEON_PROGRAM_UTIL_H */

View File

@ -111,5 +111,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
void rc_optimize(struct radeon_compiler * c, void *user);
void rc_inline_literals(struct radeon_compiler *c, void *user);
int rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data);
#endif /* RADEON_DATAFLOW_H */

View File

@ -623,15 +623,20 @@ static int peephole_mad_presub_bias(
struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1];
if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs)
return 0;
struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE)
return 0;
for (i = 0; i < 4; i++) {
if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i)))
continue;
swz = GET_SWZ(src1_reg.Swizzle, i);
if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0)
if (src1_reg.File == RC_FILE_INLINE) {
if (rc_inline_to_float(src1_reg.Index) != 2.0f)
return 0;
} else {
struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE)
return 0;
for (i = 0; i < 4; i++) {
if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i)))
continue;
swz = GET_SWZ(src1_reg.Swizzle, i);
if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0)
return 0;
}
}
/* Check src0. */
@ -835,11 +840,9 @@ static int peephole_mul_omod(
* 0 if inst is still part of the program.
* 1 if inst is no longer part of the program.
*/
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
int
rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data)
{
if (!c->has_presub)
return 0;
switch(inst->U.I.Opcode) {
case RC_OPCODE_ADD:
{
@ -1459,14 +1462,6 @@ void rc_optimize(struct radeon_compiler * c, void *user)
return;
}
/* Presubtract operations. */
inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
peephole(c, cur);
}
/* Output modifiers. */
inst = c->Program.Instructions.Next;
struct rc_list * var_list = NULL;

View File

@ -3,8 +3,8 @@
* SPDX-License-Identifier: MIT
*/
#include "util/u_math.h"
#include "radeon_program.h"
#include "radeon_compiler_util.h"
#include <stdio.h>
@ -95,17 +95,7 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun
static void rc_print_inline_float(FILE * f, int index)
{
int r300_exponent = (index >> 3) & 0xf;
unsigned r300_mantissa = index & 0x7;
unsigned float_exponent;
unsigned real_float;
r300_exponent -= 7;
float_exponent = r300_exponent + 127;
real_float = (r300_mantissa << 20) | (float_exponent << 23);
fprintf(f, "%f (0x%x)", uif(real_float), index);
fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index);
}
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)