lima/ppir: lower fdot in nir_opt_algebraic
Now that we have fsum in nir, we can move fdot lowering there. This helps reduce ppir complexity and enables the lowered ops to be part of other nir optimizations in the optimization loop. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
parent
4a407df682
commit
99c956fb47
|
@ -72,62 +72,6 @@ static bool ppir_lower_const(ppir_block *block, ppir_node *node)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* lower dot to mul+sum */
|
||||
static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
|
||||
if (!mul)
|
||||
return false;
|
||||
list_addtail(&mul->node.list, &node->list);
|
||||
|
||||
ppir_alu_node *dot = ppir_node_to_alu(node);
|
||||
mul->src[0] = dot->src[0];
|
||||
mul->src[1] = dot->src[1];
|
||||
mul->num_src = 2;
|
||||
|
||||
int num_components = node->op - ppir_op_dot2 + 2;
|
||||
ppir_dest *dest = &mul->dest;
|
||||
dest->type = ppir_target_ssa;
|
||||
dest->ssa.num_components = num_components;
|
||||
dest->ssa.live_in = INT_MAX;
|
||||
dest->ssa.live_out = 0;
|
||||
dest->write_mask = u_bit_consecutive(0, num_components);
|
||||
|
||||
ppir_node_foreach_pred_safe(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
ppir_node_remove_dep(dep);
|
||||
ppir_node_add_dep(&mul->node, pred);
|
||||
}
|
||||
ppir_node_add_dep(node, &mul->node);
|
||||
|
||||
if (node->op == ppir_op_dot2) {
|
||||
node->op = ppir_op_add;
|
||||
|
||||
ppir_node_target_assign(dot->src, dest);
|
||||
dot->src[0].swizzle[0] = 0;
|
||||
dot->src[0].absolute = false;
|
||||
dot->src[0].negate = false;
|
||||
|
||||
ppir_node_target_assign(dot->src + 1, dest);
|
||||
dot->src[1].swizzle[0] = 1;
|
||||
dot->src[1].absolute = false;
|
||||
dot->src[1].negate = false;
|
||||
}
|
||||
else {
|
||||
node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
|
||||
|
||||
ppir_node_target_assign(dot->src, dest);
|
||||
for (int i = 0; i < 4; i++)
|
||||
dot->src[0].swizzle[i] = i;
|
||||
dot->src[0].absolute = false;
|
||||
dot->src[0].negate = false;
|
||||
|
||||
dot->num_src = 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
|
||||
{
|
||||
ppir_reg *r = rzalloc(comp, ppir_reg);
|
||||
|
@ -458,9 +402,6 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
|
|||
[ppir_op_abs] = ppir_lower_abs,
|
||||
[ppir_op_neg] = ppir_lower_neg,
|
||||
[ppir_op_const] = ppir_lower_const,
|
||||
[ppir_op_dot2] = ppir_lower_dot,
|
||||
[ppir_op_dot3] = ppir_lower_dot,
|
||||
[ppir_op_dot4] = ppir_lower_dot,
|
||||
[ppir_op_rcp] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
|
||||
[ppir_op_log2] = ppir_lower_vec_to_scalar,
|
||||
|
|
|
@ -122,9 +122,8 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
|
|||
[nir_op_fabs] = ppir_op_abs,
|
||||
[nir_op_fneg] = ppir_op_neg,
|
||||
[nir_op_fadd] = ppir_op_add,
|
||||
[nir_op_fdot2] = ppir_op_dot2,
|
||||
[nir_op_fdot3] = ppir_op_dot3,
|
||||
[nir_op_fdot4] = ppir_op_dot4,
|
||||
[nir_op_fsum3] = ppir_op_sum3,
|
||||
[nir_op_fsum4] = ppir_op_sum4,
|
||||
[nir_op_frsq] = ppir_op_rsqrt,
|
||||
[nir_op_flog2] = ppir_op_log2,
|
||||
[nir_op_fexp2] = ppir_op_exp2,
|
||||
|
@ -173,13 +172,10 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
|||
|
||||
unsigned src_mask;
|
||||
switch (op) {
|
||||
case ppir_op_dot2:
|
||||
src_mask = 0b0011;
|
||||
break;
|
||||
case ppir_op_dot3:
|
||||
case ppir_op_sum3:
|
||||
src_mask = 0b0111;
|
||||
break;
|
||||
case ppir_op_dot4:
|
||||
case ppir_op_sum4:
|
||||
src_mask = 0b1111;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -60,15 +60,6 @@ const ppir_op_info ppir_op_infos[] = {
|
|||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_dot2] = {
|
||||
.name = "dot2",
|
||||
},
|
||||
[ppir_op_dot3] = {
|
||||
.name = "dot3",
|
||||
},
|
||||
[ppir_op_dot4] = {
|
||||
.name = "dot4",
|
||||
},
|
||||
[ppir_op_sum3] = {
|
||||
.name = "sum3",
|
||||
.slots = (int []) {
|
||||
|
|
|
@ -83,10 +83,6 @@ typedef enum {
|
|||
ppir_op_max,
|
||||
ppir_op_trunc,
|
||||
|
||||
ppir_op_dot2,
|
||||
ppir_op_dot3,
|
||||
ppir_op_dot4,
|
||||
|
||||
ppir_op_and,
|
||||
ppir_op_or,
|
||||
ppir_op_xor,
|
||||
|
|
|
@ -66,6 +66,7 @@ static const nir_shader_compiler_options fs_nir_options = {
|
|||
.lower_flrp64 = true,
|
||||
.lower_fsign = true,
|
||||
.lower_rotate = true,
|
||||
.lower_fdot = true,
|
||||
};
|
||||
|
||||
static const struct nir_lower_tex_options tex_options = {
|
||||
|
|
Loading…
Reference in New Issue