glsl: Convert mix() to use a new ir_triop_lrp opcode.
Many GPUs have an instruction to do linear interpolation which is more efficient than simply performing the algebra necessary (two multiplies, an add, and a subtract). Pattern matching or peepholing this is more desirable, but can be tricky. By using an opcode, we can at least make shaders which use the mix() built-in get the more efficient behavior. Currently, all consumers lower ir_triop_lrp. Subsequent patches will actually generate different code. v2 [mattst88]: - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a subsequent patch and ir_triop_lrp translated directly. v3 [mattst88]: - Move changes from the next patch to opt_algebraic.cpp to accept 3-src operations. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
18281d6088
commit
93066ce129
|
@ -4,49 +4,49 @@
|
|||
(declare (in) float arg0)
|
||||
(declare (in) float arg1)
|
||||
(declare (in) float arg2))
|
||||
((return (expression float + (expression float * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression float * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression float lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec2
|
||||
(parameters
|
||||
(declare (in) vec2 arg0)
|
||||
(declare (in) vec2 arg1)
|
||||
(declare (in) vec2 arg2))
|
||||
((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression vec2 - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec3
|
||||
(parameters
|
||||
(declare (in) vec3 arg0)
|
||||
(declare (in) vec3 arg1)
|
||||
(declare (in) vec3 arg2))
|
||||
((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression vec3 - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec4
|
||||
(parameters
|
||||
(declare (in) vec4 arg0)
|
||||
(declare (in) vec4 arg1)
|
||||
(declare (in) vec4 arg2))
|
||||
((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression vec4 - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec2
|
||||
(parameters
|
||||
(declare (in) vec2 arg0)
|
||||
(declare (in) vec2 arg1)
|
||||
(declare (in) float arg2))
|
||||
((return (expression vec2 + (expression vec2 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec2 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec3
|
||||
(parameters
|
||||
(declare (in) vec3 arg0)
|
||||
(declare (in) vec3 arg1)
|
||||
(declare (in) float arg2))
|
||||
((return (expression vec3 + (expression vec3 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec3 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature vec4
|
||||
(parameters
|
||||
(declare (in) vec4 arg0)
|
||||
(declare (in) vec4 arg1)
|
||||
(declare (in) float arg2))
|
||||
((return (expression vec4 + (expression vec4 * (var_ref arg0) (expression float - (constant float (1.000000)) (var_ref arg2))) (expression vec4 * (var_ref arg1) (var_ref arg2))))))
|
||||
((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref arg2)))))
|
||||
|
||||
(signature float
|
||||
(parameters
|
||||
|
|
|
@ -416,6 +416,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
|
|||
if (op <= ir_last_binop)
|
||||
return 2;
|
||||
|
||||
if (op <= ir_last_triop)
|
||||
return 3;
|
||||
|
||||
if (op == ir_quadop_vector)
|
||||
return 4;
|
||||
|
||||
|
@ -502,6 +505,7 @@ static const char *const operator_strs[] = {
|
|||
"pow",
|
||||
"packHalf2x16_split",
|
||||
"ubo_load",
|
||||
"lrp",
|
||||
"vector",
|
||||
};
|
||||
|
||||
|
|
|
@ -1118,6 +1118,13 @@ enum ir_expression_operation {
|
|||
*/
|
||||
ir_last_binop = ir_binop_ubo_load,
|
||||
|
||||
ir_triop_lrp,
|
||||
|
||||
/**
|
||||
* A sentinel marking the last of the ternary operations.
|
||||
*/
|
||||
ir_last_triop = ir_triop_lrp,
|
||||
|
||||
ir_quadop_vector,
|
||||
|
||||
/**
|
||||
|
|
|
@ -1248,6 +1248,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
|
|||
}
|
||||
break;
|
||||
|
||||
case ir_triop_lrp: {
|
||||
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
|
||||
assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
|
||||
assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
|
||||
|
||||
unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
|
||||
for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
|
||||
data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
|
||||
(op[1]->value.f[c] * op[2]->value.f[c2]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ir_quadop_vector:
|
||||
for (unsigned c = 0; c < this->type->vector_elements; c++) {
|
||||
switch (this->type->base_type) {
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define LOG_TO_LOG2 0x10
|
||||
#define MOD_TO_FRACT 0x20
|
||||
#define INT_DIV_TO_MUL_RCP 0x40
|
||||
#define LRP_TO_ARITH 0x80
|
||||
|
||||
/**
|
||||
* \see class lower_packing_builtins_visitor
|
||||
|
|
|
@ -468,6 +468,12 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||
assert(ir->operands[1]->type == glsl_type::uint_type);
|
||||
break;
|
||||
|
||||
case ir_triop_lrp:
|
||||
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
|
||||
assert(ir->operands[0]->type == ir->operands[1]->type);
|
||||
assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
|
||||
break;
|
||||
|
||||
case ir_quadop_vector:
|
||||
/* The vector operator collects some number of scalars and generates a
|
||||
* vector from them.
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
* - POW_TO_EXP2
|
||||
* - LOG_TO_LOG2
|
||||
* - MOD_TO_FRACT
|
||||
* - LRP_TO_ARITH
|
||||
*
|
||||
* SUB_TO_ADD_NEG:
|
||||
* ---------------
|
||||
|
@ -79,13 +80,20 @@
|
|||
* Many GPUs don't have a MOD instruction (945 and 965 included), and
|
||||
* if we have to break it down like this anyway, it gives an
|
||||
* opportunity to do things like constant fold the (1.0 / op1) easily.
|
||||
*
|
||||
* LRP_TO_ARITH:
|
||||
* -------------
|
||||
* Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
|
||||
*/
|
||||
|
||||
#include "main/core.h" /* for M_LOG2E */
|
||||
#include "glsl_types.h"
|
||||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "ir_optimization.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
class lower_instructions_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
lower_instructions_visitor(unsigned lower)
|
||||
|
@ -105,6 +113,7 @@ private:
|
|||
void exp_to_exp2(ir_expression *);
|
||||
void pow_to_exp2(ir_expression *);
|
||||
void log_to_log2(ir_expression *);
|
||||
void lrp_to_arith(ir_expression *);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
|
|||
this->progress = true;
|
||||
}
|
||||
|
||||
void
|
||||
lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
|
||||
{
|
||||
/* (lrp x y a) -> x*(1-a) + y*a */
|
||||
|
||||
/* Save op2 */
|
||||
ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
|
||||
ir_var_temporary);
|
||||
this->base_ir->insert_before(temp);
|
||||
this->base_ir->insert_before(assign(temp, ir->operands[2]));
|
||||
|
||||
ir_constant *one = new(ir) ir_constant(1.0f);
|
||||
|
||||
ir->operation = ir_binop_add;
|
||||
ir->operands[0] = mul(ir->operands[0], sub(one, temp));
|
||||
ir->operands[1] = mul(ir->operands[1], temp);
|
||||
ir->operands[2] = NULL;
|
||||
|
||||
this->progress = true;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_instructions_visitor::visit_leave(ir_expression *ir)
|
||||
{
|
||||
|
@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
|||
pow_to_exp2(ir);
|
||||
break;
|
||||
|
||||
case ir_triop_lrp:
|
||||
if (lowering(LRP_TO_ARITH))
|
||||
lrp_to_arith(ir);
|
||||
break;
|
||||
|
||||
default:
|
||||
return visit_continue;
|
||||
}
|
||||
|
|
|
@ -186,12 +186,12 @@ ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
|
|||
ir_rvalue *
|
||||
ir_algebraic_visitor::handle_expression(ir_expression *ir)
|
||||
{
|
||||
ir_constant *op_const[2] = {NULL, NULL};
|
||||
ir_expression *op_expr[2] = {NULL, NULL};
|
||||
ir_constant *op_const[3] = {NULL, NULL, NULL};
|
||||
ir_expression *op_expr[3] = {NULL, NULL, NULL};
|
||||
ir_expression *temp;
|
||||
unsigned int i;
|
||||
|
||||
assert(ir->get_num_operands() <= 2);
|
||||
assert(ir->get_num_operands() <= 3);
|
||||
for (i = 0; i < ir->get_num_operands(); i++) {
|
||||
if (ir->operands[i]->type->is_matrix())
|
||||
return ir;
|
||||
|
|
|
@ -155,7 +155,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
|||
DIV_TO_MUL_RCP |
|
||||
SUB_TO_ADD_NEG |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2);
|
||||
LOG_TO_LOG2 |
|
||||
LRP_TO_ARITH);
|
||||
|
||||
/* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
|
||||
* if-statements need to be flattened.
|
||||
|
|
|
@ -1478,6 +1478,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
|
|||
assert(!"not supported");
|
||||
break;
|
||||
|
||||
case ir_triop_lrp:
|
||||
assert(!"ir_triop_lrp should have been lowered.");
|
||||
break;
|
||||
|
||||
case ir_quadop_vector:
|
||||
/* This operation should have already been handled.
|
||||
*/
|
||||
|
@ -2993,7 +2997,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
/* Lowering */
|
||||
do_mat_op_to_vec(ir);
|
||||
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
||||
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
|
||||
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | LRP_TO_ARITH
|
||||
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
||||
|
||||
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
|
||||
|
|
|
@ -5189,6 +5189,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
DIV_TO_MUL_RCP |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2 |
|
||||
LRP_TO_ARITH |
|
||||
(options->EmitNoPow ? POW_TO_EXP2 : 0) |
|
||||
(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
|
||||
|
||||
|
|
Loading…
Reference in New Issue