freedreno/ir3/cf: handle widening too

We can also fold f16->f32 conversions.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4423>
This commit is contained in:
Rob Clark 2020-03-16 06:47:05 -07:00 committed by Marge Bot
parent bf64648864
commit e73a8a9703
1 changed files with 32 additions and 6 deletions

View File

@ -26,11 +26,26 @@
static bool
is_fp16_conv(struct ir3_instruction *instr)
{
if (instr->opc == OPC_MOV &&
instr->cat1.src_type == TYPE_F32 &&
if (instr->opc != OPC_MOV)
return false;
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
/* disallow conversions that cannot be folded into
* alu instructions:
*/
if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
return false;
if (instr->cat1.src_type == TYPE_F32 &&
instr->cat1.dst_type == TYPE_F16)
return true;
if (instr->cat1.src_type == TYPE_F16 &&
instr->cat1.dst_type == TYPE_F32)
return true;
return false;
}
@ -77,8 +92,15 @@ try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
if (!is_alu(src))
return;
/* avoid folding f2f32(f2f16) together, in cases where this is legal to
* do (glsl) nir should have handled that for us already:
*/
if (is_fp16_conv(src))
return;
switch (src->opc) {
case OPC_SEL_B32:
case OPC_SEL_B16:
case OPC_MAX_F:
case OPC_MIN_F:
case OPC_SIGN_F:
@ -97,18 +119,22 @@ try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
* change the dst type to F32 to get the right behavior, since we
* could be moving a float with a u32.u32 move.
*/
src->cat1.dst_type = TYPE_F16;
src->cat1.src_type = TYPE_F32;
src->cat1.dst_type = conv->cat1.dst_type;
src->cat1.src_type = conv->cat1.src_type;
} else {
/* Otherwise, for typechanging movs, we can just change the dst
* type to F16 to collaps the two conversions. For example
* cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
*/
src->cat1.dst_type = TYPE_F16;
src->cat1.dst_type = conv->cat1.dst_type;
}
}
src->regs[0]->flags |= IR3_REG_HALF;
if (conv->regs[0]->flags & IR3_REG_HALF) {
src->regs[0]->flags |= IR3_REG_HALF;
} else {
src->regs[0]->flags &= ~IR3_REG_HALF;
}
rewrite_uses(ir, conv, src);
}