glsl/lower_if: conditionally lower if-branches based on their size

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2016-07-03 17:11:07 +02:00
parent 83d9b8a6f6
commit e33440070a
2 changed files with 50 additions and 7 deletions

View File

@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions);
bool opt_flatten_nested_if_blocks(exec_list *instructions); bool opt_flatten_nested_if_blocks(exec_list *instructions);
bool do_discard_simplification(exec_list *instructions); bool do_discard_simplification(exec_list *instructions);
bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
unsigned max_depth = 0); unsigned max_depth = 0, unsigned min_branch_cost = 0);
bool do_mat_op_to_vec(exec_list *instructions); bool do_mat_op_to_vec(exec_list *instructions);
bool do_minmax_prune(exec_list *instructions); bool do_minmax_prune(exec_list *instructions);
bool do_noop_swizzle(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions);

View File

@ -24,8 +24,14 @@
/** /**
* \file lower_if_to_cond_assign.cpp * \file lower_if_to_cond_assign.cpp
* *
* This attempts to flatten if-statements to conditional assignments for * This flattens if-statements to conditional assignments if:
* GPUs with limited or no flow control support. *
* - the GPU has limited or no flow control support
* (controlled by max_depth)
*
* - small conditional branches are more expensive than conditional assignments
* (controlled by min_branch_cost, that's the cost for a branch to be
* preserved)
* *
* It can't handle other control flow being inside of its block, such * It can't handle other control flow being inside of its block, such
* as calls or loops. Hopefully loop unrolling and inlining will take * as calls or loops. Hopefully loop unrolling and inlining will take
@ -49,17 +55,20 @@
#include "ir.h" #include "ir.h"
#include "util/set.h" #include "util/set.h"
#include "util/hash_table.h" /* Needed for the hashing functions */ #include "util/hash_table.h" /* Needed for the hashing functions */
#include "main/macros.h" /* for MAX2 */
namespace { namespace {
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
public: public:
ir_if_to_cond_assign_visitor(gl_shader_stage stage, ir_if_to_cond_assign_visitor(gl_shader_stage stage,
unsigned max_depth) unsigned max_depth,
unsigned min_branch_cost)
{ {
this->progress = false; this->progress = false;
this->stage = stage; this->stage = stage;
this->max_depth = max_depth; this->max_depth = max_depth;
this->min_branch_cost = min_branch_cost;
this->depth = 0; this->depth = 0;
this->condition_variables = this->condition_variables =
@ -76,8 +85,13 @@ public:
ir_visitor_status visit_leave(ir_if *); ir_visitor_status visit_leave(ir_if *);
bool found_unsupported_op; bool found_unsupported_op;
bool found_expensive_op;
bool is_then;
bool progress; bool progress;
gl_shader_stage stage; gl_shader_stage stage;
unsigned then_cost;
unsigned else_cost;
unsigned min_branch_cost;
unsigned max_depth; unsigned max_depth;
unsigned depth; unsigned depth;
@ -88,12 +102,12 @@ public:
bool bool
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
unsigned max_depth) unsigned max_depth, unsigned min_branch_cost)
{ {
if (max_depth == UINT_MAX) if (max_depth == UINT_MAX)
return false; return false;
ir_if_to_cond_assign_visitor v(stage, max_depth); ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
visit_list_elements(&v, instructions); visit_list_elements(&v, instructions);
@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data)
break; break;
} }
/* SSBO, images, atomic counters are handled by ir_type_call */
case ir_type_texture:
v->found_expensive_op = true;
break;
case ir_type_expression:
case ir_type_dereference_array:
case ir_type_dereference_record:
if (v->is_then)
v->then_cost++;
else
v->else_cost++;
break;
default: default:
break; break;
} }
@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
ir_visitor_status ir_visitor_status
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
{ {
bool must_lower = this->depth-- > this->max_depth;
/* Only flatten when beyond the GPU's maximum supported nesting depth. */ /* Only flatten when beyond the GPU's maximum supported nesting depth. */
if (this->depth-- <= this->max_depth) if (!must_lower && this->min_branch_cost == 0)
return visit_continue; return visit_continue;
this->found_unsupported_op = false; this->found_unsupported_op = false;
this->found_expensive_op = false;
this->then_cost = 0;
this->else_cost = 0;
ir_assignment *assign; ir_assignment *assign;
/* Check that both blocks don't contain anything we can't support. */ /* Check that both blocks don't contain anything we can't support. */
this->is_then = true;
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
visit_tree(then_ir, check_ir_node, this); visit_tree(then_ir, check_ir_node, this);
} }
this->is_then = false;
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
visit_tree(else_ir, check_ir_node, this); visit_tree(else_ir, check_ir_node, this);
} }
if (this->found_unsupported_op) if (this->found_unsupported_op)
return visit_continue; /* can't handle inner unsupported opcodes */ return visit_continue; /* can't handle inner unsupported opcodes */
/* Skip if the branch cost is high enough or if there's an expensive op. */
if (!must_lower &&
(this->found_expensive_op ||
MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
return visit_continue;
void *mem_ctx = ralloc_parent(ir); void *mem_ctx = ralloc_parent(ir);
/* Store the condition to a variable. Move all of the instructions from /* Store the condition to a variable. Move all of the instructions from