lima/ppir: lower fdot in nir_opt_algebraic

Now that we have fsum in nir, we can move fdot lowering there.
This helps reduce ppir complexity and enables the lowered ops to be part
of other nir optimizations in the optimization loop.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
Erico Nunes 2019-07-27 18:10:46 +02:00
parent 4a407df682
commit 99c956fb47
5 changed files with 5 additions and 80 deletions

View File

@ -72,62 +72,6 @@ static bool ppir_lower_const(ppir_block *block, ppir_node *node)
return true;
}
/* lower dot to mul+sum */
static bool ppir_lower_dot(ppir_block *block, ppir_node *node)
{
ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0);
if (!mul)
return false;
list_addtail(&mul->node.list, &node->list);
ppir_alu_node *dot = ppir_node_to_alu(node);
mul->src[0] = dot->src[0];
mul->src[1] = dot->src[1];
mul->num_src = 2;
int num_components = node->op - ppir_op_dot2 + 2;
ppir_dest *dest = &mul->dest;
dest->type = ppir_target_ssa;
dest->ssa.num_components = num_components;
dest->ssa.live_in = INT_MAX;
dest->ssa.live_out = 0;
dest->write_mask = u_bit_consecutive(0, num_components);
ppir_node_foreach_pred_safe(node, dep) {
ppir_node *pred = dep->pred;
ppir_node_remove_dep(dep);
ppir_node_add_dep(&mul->node, pred);
}
ppir_node_add_dep(node, &mul->node);
if (node->op == ppir_op_dot2) {
node->op = ppir_op_add;
ppir_node_target_assign(dot->src, dest);
dot->src[0].swizzle[0] = 0;
dot->src[0].absolute = false;
dot->src[0].negate = false;
ppir_node_target_assign(dot->src + 1, dest);
dot->src[1].swizzle[0] = 1;
dot->src[1].absolute = false;
dot->src[1].negate = false;
}
else {
node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4;
ppir_node_target_assign(dot->src, dest);
for (int i = 0; i < 4; i++)
dot->src[0].swizzle[i] = i;
dot->src[0].absolute = false;
dot->src[0].negate = false;
dot->num_src = 1;
}
return true;
}
static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
{
ppir_reg *r = rzalloc(comp, ppir_reg);
@ -458,9 +402,6 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_abs] = ppir_lower_abs,
[ppir_op_neg] = ppir_lower_neg,
[ppir_op_const] = ppir_lower_const,
[ppir_op_dot2] = ppir_lower_dot,
[ppir_op_dot3] = ppir_lower_dot,
[ppir_op_dot4] = ppir_lower_dot,
[ppir_op_rcp] = ppir_lower_vec_to_scalar,
[ppir_op_rsqrt] = ppir_lower_vec_to_scalar,
[ppir_op_log2] = ppir_lower_vec_to_scalar,

View File

@ -122,9 +122,8 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
[nir_op_fabs] = ppir_op_abs,
[nir_op_fneg] = ppir_op_neg,
[nir_op_fadd] = ppir_op_add,
[nir_op_fdot2] = ppir_op_dot2,
[nir_op_fdot3] = ppir_op_dot3,
[nir_op_fdot4] = ppir_op_dot4,
[nir_op_fsum3] = ppir_op_sum3,
[nir_op_fsum4] = ppir_op_sum4,
[nir_op_frsq] = ppir_op_rsqrt,
[nir_op_flog2] = ppir_op_log2,
[nir_op_fexp2] = ppir_op_exp2,
@ -173,13 +172,10 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
unsigned src_mask;
switch (op) {
case ppir_op_dot2:
src_mask = 0b0011;
break;
case ppir_op_dot3:
case ppir_op_sum3:
src_mask = 0b0111;
break;
case ppir_op_dot4:
case ppir_op_sum4:
src_mask = 0b1111;
break;
default:

View File

@ -60,15 +60,6 @@ const ppir_op_info ppir_op_infos[] = {
PPIR_INSTR_SLOT_END
},
},
[ppir_op_dot2] = {
.name = "dot2",
},
[ppir_op_dot3] = {
.name = "dot3",
},
[ppir_op_dot4] = {
.name = "dot4",
},
[ppir_op_sum3] = {
.name = "sum3",
.slots = (int []) {

View File

@ -83,10 +83,6 @@ typedef enum {
ppir_op_max,
ppir_op_trunc,
ppir_op_dot2,
ppir_op_dot3,
ppir_op_dot4,
ppir_op_and,
ppir_op_or,
ppir_op_xor,

View File

@ -66,6 +66,7 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_flrp64 = true,
.lower_fsign = true,
.lower_rotate = true,
.lower_fdot = true,
};
static const struct nir_lower_tex_options tex_options = {