i965/vec4: Change vec4_visitor::emit_lrp to use MAC for gen<6
This allows us to emit ADD/MUL/MAC instead of MUL/ADD/MUL/ADD, saving one instruction and two temporary registers. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
This commit is contained in:
parent
0974706671
commit
2dfbbeca50
|
@ -1165,24 +1165,14 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
|
|||
} else {
|
||||
/* Earlier generations don't support three source operations, so we
|
||||
* need to emit x*(1-a) + y*a.
|
||||
*
|
||||
* A better way to do this would be:
|
||||
* ADD one_minus_a, negate(a), 1.0f
|
||||
* MUL null, y, a
|
||||
* MAC dst, x, one_minus_a
|
||||
* but we would need to support MAC and implicit accumulator.
|
||||
*/
|
||||
dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
|
||||
dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
|
||||
dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
|
||||
y_times_a.writemask = dst.writemask;
|
||||
one_minus_a.writemask = dst.writemask;
|
||||
x_times_one_minus_a.writemask = dst.writemask;
|
||||
dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
|
||||
one_minus_a.writemask = dst.writemask;
|
||||
|
||||
emit(MUL(y_times_a, y, a));
|
||||
emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
|
||||
emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
|
||||
emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
|
||||
vec4_instruction *mul = emit(MUL(dst_null_f(), y, a));
|
||||
mul->writes_accumulator = true;
|
||||
emit(MAC(dst, x, src_reg(one_minus_a)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue