freedreno/ir3: don't lower fsat

Instead, if possible fold (sat) flag into src, otherwise use:

  (sat)max.f rD, rS, rS

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2018-01-21 12:31:51 -05:00
parent b2fc94f074
commit 942341bcd0
3 changed files with 23 additions and 1 deletions

View File

@ -616,6 +616,8 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
break;
case OPC_ABSNEG_F:
case OPC_ABSNEG_S:
if (instr->flags & IR3_INSTR_SAT)
return false;
break;
default:
return false;

View File

@ -964,6 +964,27 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
case nir_op_fmin:
dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
break;
case nir_op_fsat:
/* if there is just a single use of the src, and it supports
* (sat) bit, we can just fold the (sat) flag back to the
* src instruction and create a mov. This is easier for cp
* to eliminate.
*
* TODO probably opc_cat==4 is ok too
*/
if (alu->src[0].src.is_ssa &&
(list_length(&alu->src[0].src.ssa->uses) == 1) &&
((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
src[0]->flags |= IR3_INSTR_SAT;
dst[0] = ir3_MOV(b, src[0], TYPE_U32);
} else {
/* otherwise generate a max.f that saturates.. blob does
* similar (generating a cat2 mov using max.f)
*/
dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0);
dst[0]->flags |= IR3_INSTR_SAT;
}
break;
case nir_op_fmul:
dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
break;

View File

@ -37,7 +37,6 @@
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_fsat = true,
.lower_scmp = true,
.lower_flrp32 = true,
.lower_flrp64 = true,