glsl/lower_if: conditionally lower if-branches based on their size

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2016-07-03 17:11:07 +02:00
parent 83d9b8a6f6
commit e33440070a
2 changed files with 50 additions and 7 deletions

View File

@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions);
bool opt_flatten_nested_if_blocks(exec_list *instructions);
bool do_discard_simplification(exec_list *instructions);
bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
unsigned max_depth = 0);
unsigned max_depth = 0, unsigned min_branch_cost = 0);
bool do_mat_op_to_vec(exec_list *instructions);
bool do_minmax_prune(exec_list *instructions);
bool do_noop_swizzle(exec_list *instructions);

View File

@ -24,8 +24,14 @@
/**
* \file lower_if_to_cond_assign.cpp
*
* This attempts to flatten if-statements to conditional assignments for
* GPUs with limited or no flow control support.
* This flattens if-statements to conditional assignments if:
*
* - the GPU has limited or no flow control support
* (controlled by max_depth)
*
* - small conditional branches are more expensive than conditional assignments
* (controlled by min_branch_cost, that's the cost for a branch to be
* preserved)
*
* It can't handle other control flow being inside of its block, such
* as calls or loops. Hopefully loop unrolling and inlining will take
@ -49,17 +55,20 @@
#include "ir.h"
#include "util/set.h"
#include "util/hash_table.h" /* Needed for the hashing functions */
#include "main/macros.h" /* for MAX2 */
namespace {
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
public:
ir_if_to_cond_assign_visitor(gl_shader_stage stage,
unsigned max_depth)
unsigned max_depth,
unsigned min_branch_cost)
{
this->progress = false;
this->stage = stage;
this->max_depth = max_depth;
this->min_branch_cost = min_branch_cost;
this->depth = 0;
this->condition_variables =
@ -76,8 +85,13 @@ public:
ir_visitor_status visit_leave(ir_if *);
bool found_unsupported_op;
bool found_expensive_op;
bool is_then;
bool progress;
gl_shader_stage stage;
unsigned then_cost;
unsigned else_cost;
unsigned min_branch_cost;
unsigned max_depth;
unsigned depth;
@ -88,12 +102,12 @@ public:
bool
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
unsigned max_depth)
unsigned max_depth, unsigned min_branch_cost)
{
if (max_depth == UINT_MAX)
return false;
ir_if_to_cond_assign_visitor v(stage, max_depth);
ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
visit_list_elements(&v, instructions);
@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data)
break;
}
/* SSBO, images, atomic counters are handled by ir_type_call */
case ir_type_texture:
v->found_expensive_op = true;
break;
case ir_type_expression:
case ir_type_dereference_array:
case ir_type_dereference_record:
if (v->is_then)
v->then_cost++;
else
v->else_cost++;
break;
default:
break;
}
@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
ir_visitor_status
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
{
bool must_lower = this->depth-- > this->max_depth;
/* Only flatten when beyond the GPU's maximum supported nesting depth. */
if (this->depth-- <= this->max_depth)
if (!must_lower && this->min_branch_cost == 0)
return visit_continue;
this->found_unsupported_op = false;
this->found_expensive_op = false;
this->then_cost = 0;
this->else_cost = 0;
ir_assignment *assign;
/* Check that both blocks don't contain anything we can't support. */
this->is_then = true;
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
visit_tree(then_ir, check_ir_node, this);
}
this->is_then = false;
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
visit_tree(else_ir, check_ir_node, this);
}
if (this->found_unsupported_op)
return visit_continue; /* can't handle inner unsupported opcodes */
/* Skip if the branch cost is high enough or if there's an expensive op. */
if (!must_lower &&
(this->found_expensive_op ||
MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
return visit_continue;
void *mem_ctx = ralloc_parent(ir);
/* Store the condition to a variable. Move all of the instructions from