i965/fs: Allow LRPs with uniform registers.
Improves GLB2.7 performance on my HSW by 0.671455% +/- 0.225037% (n=62). v2: Make is_valid_3src() a method of the fs_reg. (recommended by Ken) Reviewed-by: Matt Turner <mattst88@gmail.com> (v1) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
This commit is contained in:
parent
de7e8b1d01
commit
61ca2c4f73
|
@ -444,6 +444,12 @@ fs_reg::is_one() const
|
|||
return type == BRW_REGISTER_TYPE_F ? imm.f == 1.0 : imm.i == 1;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_reg::is_valid_3src() const
|
||||
{
|
||||
return file == GRF || file == UNIFORM;
|
||||
}
|
||||
|
||||
int
|
||||
fs_visitor::type_size(const struct glsl_type *type)
|
||||
{
|
||||
|
|
|
@ -93,6 +93,7 @@ public:
|
|||
bool equals(const fs_reg &r) const;
|
||||
bool is_zero() const;
|
||||
bool is_one() const;
|
||||
bool is_valid_3src() const;
|
||||
|
||||
/** Register file: ARF, GRF, MRF, IMM. */
|
||||
enum register_file file;
|
||||
|
|
|
@ -201,7 +201,10 @@ fs_visitor::visit(ir_dereference_array *ir)
|
|||
void
|
||||
fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
|
||||
{
|
||||
if (intel->gen < 6 || x.file != GRF || y.file != GRF || a.file != GRF) {
|
||||
if (intel->gen < 6 ||
|
||||
!x.is_valid_3src() ||
|
||||
!y.is_valid_3src() ||
|
||||
!a.is_valid_3src()) {
|
||||
/* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
|
||||
fs_reg y_times_a = fs_reg(this, glsl_type::float_type);
|
||||
fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);
|
||||
|
|
Loading…
Reference in New Issue