From 93066ce1299a7be8f670e527f249940c635605b4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 1 Dec 2012 23:49:26 -0800 Subject: [PATCH] glsl: Convert mix() to use a new ir_triop_lrp opcode. Many GPUs have an instruction to do linear interpolation which is more efficient than simply performing the algebra necessary (two multiplies, an add, and a subtract). Pattern matching or peepholing this is more desirable, but can be tricky. By using an opcode, we can at least make shaders which use the mix() built-in get the more efficient behavior. Currently, all consumers lower ir_triop_lrp. Subsequent patches will actually generate different code. v2 [mattst88]: - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a subsequent patch and ir_triop_lrp translated directly. v3 [mattst88]: - Move changes from the next patch to opt_algebraic.cpp to accept 3-src operations. Reviewed-by: Matt Turner Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/glsl/builtins/ir/mix.ir | 14 ++++----- src/glsl/ir.cpp | 4 +++ src/glsl/ir.h | 7 +++++ src/glsl/ir_constant_expression.cpp | 13 ++++++++ src/glsl/ir_optimization.h | 1 + src/glsl/ir_validate.cpp | 6 ++++ src/glsl/lower_instructions.cpp | 35 ++++++++++++++++++++++ src/glsl/opt_algebraic.cpp | 6 ++-- src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +- src/mesa/program/ir_to_mesa.cpp | 6 +++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + 11 files changed, 84 insertions(+), 12 deletions(-) diff --git a/src/glsl/builtins/ir/mix.ir b/src/glsl/builtins/ir/mix.ir index 70ae13c9f3f..e6665321143 100644 --- a/src/glsl/builtins/ir/mix.ir +++ b/src/glsl/builtins/ir/mix.ir @@ -4,49 +4,49 @@ (declare (in) float arg0) (declare (in) float arg1) (declare (in) float arg2)) - ((return (expression float + (expression float * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression float * (var_ref arg1) (var_ref arg2)))))) + ((return (expression float lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec2 (parameters (declare (in) vec2 arg0) (declare (in) vec2 arg1) (declare (in) vec2 arg2)) - ((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression vec2 - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec3 (parameters (declare (in) vec3 arg0) (declare (in) vec3 arg1) (declare (in) vec3 arg2)) - ((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression vec3 - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec4 (parameters (declare (in) vec4 arg0) (declare (in) vec4 arg1) (declare (in) vec4 arg2)) - ((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression vec4 - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec2 (parameters (declare (in) vec2 arg0) (declare (in) vec2 arg1) (declare (in) float arg2)) - ((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec3 (parameters (declare (in) vec3 arg0) (declare (in) vec3 arg1) (declare (in) float arg2)) - ((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature vec4 (parameters (declare (in) vec4 arg0) (declare (in) vec4 arg1) (declare (in) float arg2)) - ((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2)))))) + ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2))))) (signature float (parameters diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 4ccdc42dce0..717d6f610ef 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -416,6 +416,9 @@ ir_expression::get_num_operands(ir_expression_operation op) if (op <= ir_last_binop) return 2; + if (op <= ir_last_triop) + return 3; + if (op == ir_quadop_vector) return 4; @@ -502,6 +505,7 @@ static const char *const operator_strs[] = { "pow", "packHalf2x16_split", "ubo_load", + "lrp", "vector", }; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index d878bd8fd03..d63dac100c7 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1118,6 +1118,13 @@ enum ir_expression_operation { */ ir_last_binop = ir_binop_ubo_load, + ir_triop_lrp, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_triop = ir_triop_lrp, + ir_quadop_vector, /** diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 86b863f3103..c2d0dc46c78 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -1248,6 +1248,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) } break; + case ir_triop_lrp: { + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT); + + unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; + for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { + data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + + (op[1]->value.f[c] * op[2]->value.f[c2]); + } + break; + } + case ir_quadop_vector: for (unsigned c = 0; c < this->type->vector_elements; c++) { switch (this->type->base_type) { diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 8f33018404e..2454bbe6f66 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -36,6 +36,7 @@ #define LOG_TO_LOG2 0x10 #define MOD_TO_FRACT 0x20 #define INT_DIV_TO_MUL_RCP 0x40 +#define LRP_TO_ARITH 0x80 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index d8cafd55f07..24ea506dca3 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -468,6 +468,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[1]->type == glsl_type::uint_type); break; + case ir_triop_lrp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type); + break; + case ir_quadop_vector: /* The vector operator collects some number of scalars and generates a * vector from them. diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index a8ef7654e66..1ce7b7c9df4 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -37,6 +37,7 @@ * - POW_TO_EXP2 * - LOG_TO_LOG2 * - MOD_TO_FRACT + * - LRP_TO_ARITH * * SUB_TO_ADD_NEG: * --------------- @@ -79,13 +80,20 @@ * Many GPUs don't have a MOD instruction (945 and 965 included), and * if we have to break it down like this anyway, it gives an * opportunity to do things like constant fold the (1.0 / op1) easily. + * + * LRP_TO_ARITH: + * ------------- + * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2). */ #include "main/core.h" /* for M_LOG2E */ #include "glsl_types.h" #include "ir.h" +#include "ir_builder.h" #include "ir_optimization.h" +using namespace ir_builder; + class lower_instructions_visitor : public ir_hierarchical_visitor { public: lower_instructions_visitor(unsigned lower) @@ -105,6 +113,7 @@ private: void exp_to_exp2(ir_expression *); void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); + void lrp_to_arith(ir_expression *); }; /** @@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::lrp_to_arith(ir_expression *ir) +{ + /* (lrp x y a) -> x*(1-a) + y*a */ + + /* Save op2 */ + ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor", + ir_var_temporary); + this->base_ir->insert_before(temp); + this->base_ir->insert_before(assign(temp, ir->operands[2])); + + ir_constant *one = new(ir) ir_constant(1.0f); + + ir->operation = ir_binop_add; + ir->operands[0] = mul(ir->operands[0], sub(one, temp)); + ir->operands[1] = mul(ir->operands[1], temp); + ir->operands[2] = NULL; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) pow_to_exp2(ir); break; + case ir_triop_lrp: + if (lowering(LRP_TO_ARITH)) + lrp_to_arith(ir); + break; + default: return visit_continue; } diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 75948db16fa..44a21b6c335 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -186,12 +186,12 @@ ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, ir_rvalue * ir_algebraic_visitor::handle_expression(ir_expression *ir) { - ir_constant *op_const[2] = {NULL, NULL}; - ir_expression *op_expr[2] = {NULL, NULL}; + ir_constant *op_const[3] = {NULL, NULL, NULL}; + ir_expression *op_expr[3] = {NULL, NULL, NULL}; ir_expression *temp; unsigned int i; - assert(ir->get_num_operands() <= 2); + assert(ir->get_num_operands() <= 3); for (i = 0; i < ir->get_num_operands(); i++) { if (ir->operands[i]->type->is_matrix()) return ir; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c71715ee512..9ab18cc7d63 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -155,7 +155,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | EXP_TO_EXP2 | - LOG_TO_LOG2); + LOG_TO_LOG2 | + LRP_TO_ARITH); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index ce409eca9d1..30305d2e77d 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1478,6 +1478,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir) assert(!"not supported"); break; + case ir_triop_lrp: + assert(!"ir_triop_lrp should have been lowered."); + break; + case ir_quadop_vector: /* This operation should have already been handled. */ @@ -2993,7 +2997,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | LRP_TO_ARITH | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 63b74285aa3..757bd71aaba 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5189,6 +5189,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | + LRP_TO_ARITH | (options->EmitNoPow ? POW_TO_EXP2 : 0) | (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));