glsl: Convert mix() to use a new ir_triop_lrp opcode.

Many GPUs have an instruction to do linear interpolation which is more
efficient than simply performing the algebra necessary (two multiplies,
an add, and a subtract).

Pattern matching or peepholing this is more desirable, but can be
tricky.  By using an opcode, we can at least make shaders which use the
mix() built-in get the more efficient behavior.

Currently, all consumers lower ir_triop_lrp.  Subsequent patches will
actually generate different code.

v2 [mattst88]:
   - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a
     subsequent patch and ir_triop_lrp translated directly.
v3 [mattst88]:
   - Move changes from the next patch to opt_algebraic.cpp to accept
     3-src operations.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Kenneth Graunke 2012-12-01 23:49:26 -08:00 committed by Matt Turner
parent 18281d6088
commit 93066ce129
11 changed files with 84 additions and 12 deletions

View File

@ -4,49 +4,49 @@
(declare (in) float arg0) (declare (in) float arg0)
(declare (in) float arg1) (declare (in) float arg1)
(declare (in) float arg2)) (declare (in) float arg2))
((return (expression float + (expression float * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression float * (var_ref arg1) (var_ref arg2)))))) ((return (expression float lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec2 (signature vec2
(parameters (parameters
(declare (in) vec2 arg0) (declare (in) vec2 arg0)
(declare (in) vec2 arg1) (declare (in) vec2 arg1)
(declare (in) vec2 arg2)) (declare (in) vec2 arg2))
((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression vec2 - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec3 (signature vec3
(parameters (parameters
(declare (in) vec3 arg0) (declare (in) vec3 arg0)
(declare (in) vec3 arg1) (declare (in) vec3 arg1)
(declare (in) vec3 arg2)) (declare (in) vec3 arg2))
((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression vec3 - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec4 (signature vec4
(parameters (parameters
(declare (in) vec4 arg0) (declare (in) vec4 arg0)
(declare (in) vec4 arg1) (declare (in) vec4 arg1)
(declare (in) vec4 arg2)) (declare (in) vec4 arg2))
((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression vec4 - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec2 (signature vec2
(parameters (parameters
(declare (in) vec2 arg0) (declare (in) vec2 arg0)
(declare (in) vec2 arg1) (declare (in) vec2 arg1)
(declare (in) float arg2)) (declare (in) float arg2))
((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec3 (signature vec3
(parameters (parameters
(declare (in) vec3 arg0) (declare (in) vec3 arg0)
(declare (in) vec3 arg1) (declare (in) vec3 arg1)
(declare (in) float arg2)) (declare (in) float arg2))
((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature vec4 (signature vec4
(parameters (parameters
(declare (in) vec4 arg0) (declare (in) vec4 arg0)
(declare (in) vec4 arg1) (declare (in) vec4 arg1)
(declare (in) float arg2)) (declare (in) float arg2))
((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2)))))) ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
(signature float (signature float
(parameters (parameters

View File

@ -416,6 +416,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
if (op <= ir_last_binop) if (op <= ir_last_binop)
return 2; return 2;
if (op <= ir_last_triop)
return 3;
if (op == ir_quadop_vector) if (op == ir_quadop_vector)
return 4; return 4;
@ -502,6 +505,7 @@ static const char *const operator_strs[] = {
"pow", "pow",
"packHalf2x16_split", "packHalf2x16_split",
"ubo_load", "ubo_load",
"lrp",
"vector", "vector",
}; };

View File

@ -1118,6 +1118,13 @@ enum ir_expression_operation {
*/ */
ir_last_binop = ir_binop_ubo_load, ir_last_binop = ir_binop_ubo_load,
ir_triop_lrp,
/**
* A sentinel marking the last of the ternary operations.
*/
ir_last_triop = ir_triop_lrp,
ir_quadop_vector, ir_quadop_vector,
/** /**

View File

@ -1248,6 +1248,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
} }
break; break;
case ir_triop_lrp: {
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
(op[1]->value.f[c] * op[2]->value.f[c2]);
}
break;
}
case ir_quadop_vector: case ir_quadop_vector:
for (unsigned c = 0; c < this->type->vector_elements; c++) { for (unsigned c = 0; c < this->type->vector_elements; c++) {
switch (this->type->base_type) { switch (this->type->base_type) {

View File

@ -36,6 +36,7 @@
#define LOG_TO_LOG2 0x10 #define LOG_TO_LOG2 0x10
#define MOD_TO_FRACT 0x20 #define MOD_TO_FRACT 0x20
#define INT_DIV_TO_MUL_RCP 0x40 #define INT_DIV_TO_MUL_RCP 0x40
#define LRP_TO_ARITH 0x80
/** /**
* \see class lower_packing_builtins_visitor * \see class lower_packing_builtins_visitor

View File

@ -468,6 +468,12 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[1]->type == glsl_type::uint_type); assert(ir->operands[1]->type == glsl_type::uint_type);
break; break;
case ir_triop_lrp:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(ir->operands[0]->type == ir->operands[1]->type);
assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
break;
case ir_quadop_vector: case ir_quadop_vector:
/* The vector operator collects some number of scalars and generates a /* The vector operator collects some number of scalars and generates a
* vector from them. * vector from them.

View File

@ -37,6 +37,7 @@
* - POW_TO_EXP2 * - POW_TO_EXP2
* - LOG_TO_LOG2 * - LOG_TO_LOG2
* - MOD_TO_FRACT * - MOD_TO_FRACT
* - LRP_TO_ARITH
* *
* SUB_TO_ADD_NEG: * SUB_TO_ADD_NEG:
* --------------- * ---------------
@ -79,13 +80,20 @@
* Many GPUs don't have a MOD instruction (945 and 965 included), and * Many GPUs don't have a MOD instruction (945 and 965 included), and
* if we have to break it down like this anyway, it gives an * if we have to break it down like this anyway, it gives an
* opportunity to do things like constant fold the (1.0 / op1) easily. * opportunity to do things like constant fold the (1.0 / op1) easily.
*
* LRP_TO_ARITH:
* -------------
* Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
*/ */
#include "main/core.h" /* for M_LOG2E */ #include "main/core.h" /* for M_LOG2E */
#include "glsl_types.h" #include "glsl_types.h"
#include "ir.h" #include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h" #include "ir_optimization.h"
using namespace ir_builder;
class lower_instructions_visitor : public ir_hierarchical_visitor { class lower_instructions_visitor : public ir_hierarchical_visitor {
public: public:
lower_instructions_visitor(unsigned lower) lower_instructions_visitor(unsigned lower)
@ -105,6 +113,7 @@ private:
void exp_to_exp2(ir_expression *); void exp_to_exp2(ir_expression *);
void pow_to_exp2(ir_expression *); void pow_to_exp2(ir_expression *);
void log_to_log2(ir_expression *); void log_to_log2(ir_expression *);
void lrp_to_arith(ir_expression *);
}; };
/** /**
@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
this->progress = true; this->progress = true;
} }
void
lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
{
/* (lrp x y a) -> x*(1-a) + y*a */
/* Save op2 */
ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
ir_var_temporary);
this->base_ir->insert_before(temp);
this->base_ir->insert_before(assign(temp, ir->operands[2]));
ir_constant *one = new(ir) ir_constant(1.0f);
ir->operation = ir_binop_add;
ir->operands[0] = mul(ir->operands[0], sub(one, temp));
ir->operands[1] = mul(ir->operands[1], temp);
ir->operands[2] = NULL;
this->progress = true;
}
ir_visitor_status ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir) lower_instructions_visitor::visit_leave(ir_expression *ir)
{ {
@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
pow_to_exp2(ir); pow_to_exp2(ir);
break; break;
case ir_triop_lrp:
if (lowering(LRP_TO_ARITH))
lrp_to_arith(ir);
break;
default: default:
return visit_continue; return visit_continue;
} }

View File

@ -186,12 +186,12 @@ ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
ir_rvalue * ir_rvalue *
ir_algebraic_visitor::handle_expression(ir_expression *ir) ir_algebraic_visitor::handle_expression(ir_expression *ir)
{ {
ir_constant *op_const[2] = {NULL, NULL}; ir_constant *op_const[3] = {NULL, NULL, NULL};
ir_expression *op_expr[2] = {NULL, NULL}; ir_expression *op_expr[3] = {NULL, NULL, NULL};
ir_expression *temp; ir_expression *temp;
unsigned int i; unsigned int i;
assert(ir->get_num_operands() <= 2); assert(ir->get_num_operands() <= 3);
for (i = 0; i < ir->get_num_operands(); i++) { for (i = 0; i < ir->get_num_operands(); i++) {
if (ir->operands[i]->type->is_matrix()) if (ir->operands[i]->type->is_matrix())
return ir; return ir;

View File

@ -155,7 +155,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
DIV_TO_MUL_RCP | DIV_TO_MUL_RCP |
SUB_TO_ADD_NEG | SUB_TO_ADD_NEG |
EXP_TO_EXP2 | EXP_TO_EXP2 |
LOG_TO_LOG2); LOG_TO_LOG2 |
LRP_TO_ARITH);
/* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
* if-statements need to be flattened. * if-statements need to be flattened.

View File

@ -1478,6 +1478,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
assert(!"not supported"); assert(!"not supported");
break; break;
case ir_triop_lrp:
assert(!"ir_triop_lrp should have been lowered.");
break;
case ir_quadop_vector: case ir_quadop_vector:
/* This operation should have already been handled. /* This operation should have already been handled.
*/ */
@ -2993,7 +2997,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
/* Lowering */ /* Lowering */
do_mat_op_to_vec(ir); do_mat_op_to_vec(ir);
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | LRP_TO_ARITH
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;

View File

@ -5189,6 +5189,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
DIV_TO_MUL_RCP | DIV_TO_MUL_RCP |
EXP_TO_EXP2 | EXP_TO_EXP2 |
LOG_TO_LOG2 | LOG_TO_LOG2 |
LRP_TO_ARITH |
(options->EmitNoPow ? POW_TO_EXP2 : 0) | (options->EmitNoPow ? POW_TO_EXP2 : 0) |
(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));