glsl/lower_if: conditionally lower if-branches based on their size
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
83d9b8a6f6
commit
e33440070a
|
@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions);
|
|||
bool opt_flatten_nested_if_blocks(exec_list *instructions);
|
||||
bool do_discard_simplification(exec_list *instructions);
|
||||
bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
|
||||
unsigned max_depth = 0);
|
||||
unsigned max_depth = 0, unsigned min_branch_cost = 0);
|
||||
bool do_mat_op_to_vec(exec_list *instructions);
|
||||
bool do_minmax_prune(exec_list *instructions);
|
||||
bool do_noop_swizzle(exec_list *instructions);
|
||||
|
|
|
@ -24,8 +24,14 @@
|
|||
/**
|
||||
* \file lower_if_to_cond_assign.cpp
|
||||
*
|
||||
* This attempts to flatten if-statements to conditional assignments for
|
||||
* GPUs with limited or no flow control support.
|
||||
* This flattens if-statements to conditional assignments if:
|
||||
*
|
||||
* - the GPU has limited or no flow control support
|
||||
* (controlled by max_depth)
|
||||
*
|
||||
* - small conditional branches are more expensive than conditional assignments
|
||||
* (controlled by min_branch_cost, that's the cost for a branch to be
|
||||
* preserved)
|
||||
*
|
||||
* It can't handle other control flow being inside of its block, such
|
||||
* as calls or loops. Hopefully loop unrolling and inlining will take
|
||||
|
@ -49,17 +55,20 @@
|
|||
#include "ir.h"
|
||||
#include "util/set.h"
|
||||
#include "util/hash_table.h" /* Needed for the hashing functions */
|
||||
#include "main/macros.h" /* for MAX2 */
|
||||
|
||||
namespace {
|
||||
|
||||
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
ir_if_to_cond_assign_visitor(gl_shader_stage stage,
|
||||
unsigned max_depth)
|
||||
unsigned max_depth,
|
||||
unsigned min_branch_cost)
|
||||
{
|
||||
this->progress = false;
|
||||
this->stage = stage;
|
||||
this->max_depth = max_depth;
|
||||
this->min_branch_cost = min_branch_cost;
|
||||
this->depth = 0;
|
||||
|
||||
this->condition_variables =
|
||||
|
@ -76,8 +85,13 @@ public:
|
|||
ir_visitor_status visit_leave(ir_if *);
|
||||
|
||||
bool found_unsupported_op;
|
||||
bool found_expensive_op;
|
||||
bool is_then;
|
||||
bool progress;
|
||||
gl_shader_stage stage;
|
||||
unsigned then_cost;
|
||||
unsigned else_cost;
|
||||
unsigned min_branch_cost;
|
||||
unsigned max_depth;
|
||||
unsigned depth;
|
||||
|
||||
|
@ -88,12 +102,12 @@ public:
|
|||
|
||||
bool
|
||||
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
|
||||
unsigned max_depth)
|
||||
unsigned max_depth, unsigned min_branch_cost)
|
||||
{
|
||||
if (max_depth == UINT_MAX)
|
||||
return false;
|
||||
|
||||
ir_if_to_cond_assign_visitor v(stage, max_depth);
|
||||
ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
|
@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data)
|
|||
break;
|
||||
}
|
||||
|
||||
/* SSBO, images, atomic counters are handled by ir_type_call */
|
||||
case ir_type_texture:
|
||||
v->found_expensive_op = true;
|
||||
break;
|
||||
|
||||
case ir_type_expression:
|
||||
case ir_type_dereference_array:
|
||||
case ir_type_dereference_record:
|
||||
if (v->is_then)
|
||||
v->then_cost++;
|
||||
else
|
||||
v->else_cost++;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
|
|||
ir_visitor_status
|
||||
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
|
||||
{
|
||||
bool must_lower = this->depth-- > this->max_depth;
|
||||
|
||||
/* Only flatten when beyond the GPU's maximum supported nesting depth. */
|
||||
if (this->depth-- <= this->max_depth)
|
||||
if (!must_lower && this->min_branch_cost == 0)
|
||||
return visit_continue;
|
||||
|
||||
this->found_unsupported_op = false;
|
||||
this->found_expensive_op = false;
|
||||
this->then_cost = 0;
|
||||
this->else_cost = 0;
|
||||
|
||||
ir_assignment *assign;
|
||||
|
||||
/* Check that both blocks don't contain anything we can't support. */
|
||||
this->is_then = true;
|
||||
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
|
||||
visit_tree(then_ir, check_ir_node, this);
|
||||
}
|
||||
|
||||
this->is_then = false;
|
||||
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
|
||||
visit_tree(else_ir, check_ir_node, this);
|
||||
}
|
||||
|
||||
if (this->found_unsupported_op)
|
||||
return visit_continue; /* can't handle inner unsupported opcodes */
|
||||
|
||||
/* Skip if the branch cost is high enough or if there's an expensive op. */
|
||||
if (!must_lower &&
|
||||
(this->found_expensive_op ||
|
||||
MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
|
||||
return visit_continue;
|
||||
|
||||
void *mem_ctx = ralloc_parent(ir);
|
||||
|
||||
/* Store the condition to a variable. Move all of the instructions from
|
||||
|
|
Loading…
Reference in New Issue