From 61ca2c4f73f84eec29454698188309ab311eb503 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 25 Apr 2013 20:20:05 -0700 Subject: [PATCH] i965/fs: Allow LRPs with uniform registers. Improves GLB2.7 performance on my HSW by 0.671455% +/- 0.225037% (n=62). v2: Make is_valid_3src() a method of the fs_reg. (recommended by Ken) Reviewed-by: Matt Turner (v1) Reviewed-by: Kenneth Graunke (v1) --- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 ++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b45035e80f0..a8610eea92a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -444,6 +444,12 @@ fs_reg::is_one() const return type == BRW_REGISTER_TYPE_F ? imm.f == 1.0 : imm.i == 1; } +bool +fs_reg::is_valid_3src() const +{ + return file == GRF || file == UNIFORM; +} + int fs_visitor::type_size(const struct glsl_type *type) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index efe90f4ae01..c9c9856748d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -93,6 +93,7 @@ public: bool equals(const fs_reg &r) const; bool is_zero() const; bool is_one() const; + bool is_valid_3src() const; /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index f1539d5c3a7..55ae6898866 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -201,7 +201,10 @@ fs_visitor::visit(ir_dereference_array *ir) void fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) { - if (intel->gen < 6 || x.file != GRF || y.file != GRF || a.file != GRF) { + if (intel->gen < 6 || + !x.is_valid_3src() || + !y.is_valid_3src() || + !a.is_valid_3src()) { /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ fs_reg y_times_a = fs_reg(this, glsl_type::float_type); fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);