mesa/src/compiler/nir/nir_opt_peephole_select.c

330 lines
10 KiB
C
Raw Normal View History

/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jason Ekstrand (jason@jlekstrand.net)
*
*/
#include "nir.h"
#include "nir_control_flow.h"
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 21:52:38 +00:00
#include "nir_search_helpers.h"
/*
* Implements a small peephole optimization that looks for
*
* if (cond) {
* <then SSA defs>
* } else {
* <else SSA defs>
* }
* phi
* ...
* phi
*
* and replaces it with:
*
* <then SSA defs>
* <else SSA defs>
* bcsel
* ...
* bcsel
*
* where the SSA defs are ALU operations or other cheap instructions (not
* texturing, for example).
*
* If the number of ALU operations in the branches is greater than the limit
* parameter, then the optimization is skipped. In limit=0 mode, the SSA defs
* must only be MOVs which we expect to get copy-propagated away once they're
* out of the inner blocks.
*/
static bool
block_check_for_allowed_instrs(nir_block *block, unsigned *count,
bool alu_ok, bool indirect_load_ok,
bool expensive_alu_ok)
{
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref: {
nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
switch (deref->mode) {
case nir_var_shader_in:
case nir_var_uniform:
/* Don't try to remove flow control around an indirect load
* because that flow control may be trying to avoid invalid
* loads.
*/
if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
return false;
break;
default:
return false;
}
break;
}
case nir_intrinsic_load_uniform:
if (!alu_ok)
return false;
break;
default:
return false;
}
break;
}
case nir_instr_type_deref:
case nir_instr_type_load_const:
break;
case nir_instr_type_alu: {
nir_alu_instr *mov = nir_instr_as_alu(instr);
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 21:52:38 +00:00
bool movelike = false;
switch (mov->op) {
case nir_op_mov:
case nir_op_fneg:
case nir_op_ineg:
case nir_op_fabs:
case nir_op_iabs:
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec8:
case nir_op_vec16:
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 21:52:38 +00:00
movelike = true;
break;
case nir_op_fcos:
case nir_op_fdiv:
case nir_op_fexp2:
case nir_op_flog2:
case nir_op_fmod:
case nir_op_fpow:
case nir_op_frcp:
case nir_op_frem:
case nir_op_frsq:
case nir_op_fsin:
case nir_op_idiv:
case nir_op_irem:
case nir_op_udiv:
if (!alu_ok || !expensive_alu_ok)
return false;
break;
default:
if (!alu_ok) {
/* It must be a move-like operation. */
return false;
}
break;
}
/* It must be SSA */
if (!mov->dest.dest.is_ssa)
return false;
if (alu_ok) {
nir/opt_peephole_select: Don't count some unary operations In many cases, fsat, fneg, fabs, ineg, and iabs will get folded into another instruction as either source or destination modifiers. Counting them as instructions means that some if-statements won't get converted to selects. For example, vec1 32 ssa_25 = flt32 ssa_0, ssa_23.x /* succs: block_1 block_2 */ if ssa_25 { block block_1: /* preds: block_0 */ vec1 32 ssa_26 = fabs ssa_24 vec1 32 ssa_27 = fneg ssa_26 vec1 32 ssa_28 = fabs ssa_20 vec1 32 ssa_29 = fneg ssa_28 vec1 32 ssa_30 = fmul ssa_27, ssa_29 vec1 32 ssa_31 = fsat ssa_30 /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ block_1 isn't really 6 instructions, but it will be counted that way. Most callers of the peephole_select pass use either 1 or 8. It's very easy to blow way past either of these limits with things that are really only one or two actual instructions. I also tried some fancier things like making sure the fsat was of another SSA def from the same block, but the simple test was actually better. The i965 back-end SEL peephole pass still helps ~700 shaders in shader-db with this change. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Matt Turner <mattst88@gmail.com> All Gen6+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14743694 -> 14738910 (-0.03%) instructions in affected programs: 156575 -> 151791 (-3.06%) helped: 1204 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 3.97 x̃: 3 helped stats (rel) min: 0.15% max: 19.57% x̄: 5.15% x̃: 4.55% 95% mean confidence interval for instructions value: -4.12 -3.82 95% mean confidence interval for instructions %-change: -5.35% -4.95% Instructions are helped. total cycles in shared programs: 231749141 -> 231602916 (-0.06%) cycles in affected programs: 2818975 -> 2672750 (-5.19%) helped: 876 HURT: 322 helped stats (abs) min: 2 max: 788 x̄: 180.99 x̃: 220 helped stats (rel) min: <.01% max: 43.82% x̄: 20.75% x̃: 19.44% HURT stats (abs) min: 1 max: 1188 x̄: 38.27 x̃: 20 HURT stats (rel) min: 0.09% max: 102.67% x̄: 5.17% x̃: 1.70% 95% mean confidence interval for cycles value: -130.47 -113.64 95% mean confidence interval for cycles %-change: -14.85% -12.72% Cycles are helped. total sends in shared programs: 730495 -> 730491 (<.01%) sends in affected programs: 46 -> 42 (-8.70%) helped: 2 HURT: 0 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8122757 -> 8122617 (<.01%) instructions in affected programs: 14716 -> 14576 (-0.95%) helped: 46 HURT: 1 helped stats (abs) min: 1 max: 8 x̄: 3.07 x̃: 3 helped stats (rel) min: 0.36% max: 10.00% x̄: 2.54% x̃: 1.06% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.59% max: 1.59% x̄: 1.59% x̃: 1.59% 95% mean confidence interval for instructions value: -3.42 -2.54 95% mean confidence interval for instructions %-change: -3.28% -1.62% Instructions are helped. total cycles in shared programs: 188510100 -> 188509780 (<.01%) cycles in affected programs: 58994 -> 58674 (-0.54%) helped: 32 HURT: 1 helped stats (abs) min: 2 max: 96 x̄: 10.06 x̃: 6 helped stats (rel) min: 0.05% max: 15.29% x̄: 1.37% x̃: 0.31% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.68% max: 0.68% x̄: 0.68% x̃: 0.68% 95% mean confidence interval for cycles value: -16.34 -3.06 95% mean confidence interval for cycles %-change: -2.46% -0.15% Cycles are helped.
2019-11-01 21:52:38 +00:00
/* If the ALU operation is an fsat or a move-like operation, do
* not count it. The expectation is that it will eventually be
* merged as a destination modifier or source modifier on some
* other instruction.
*/
if (mov->op != nir_op_fsat && !movelike)
(*count)++;
} else {
/* Can't handle saturate */
if (mov->dest.saturate)
return false;
/* It cannot have any if-uses */
if (!list_is_empty(&mov->dest.dest.ssa.if_uses))
return false;
/* The only uses of this definition must be phis in the successor */
nir_foreach_use(use, &mov->dest.dest.ssa) {
if (use->parent_instr->type != nir_instr_type_phi ||
use->parent_instr->block != block->successors[0])
return false;
}
}
break;
}
default:
return false;
}
}
return true;
}
static bool
nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
unsigned limit, bool indirect_load_ok,
bool expensive_alu_ok)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
if (prev_node->type != nir_cf_node_if)
return false;
nir_if *if_stmt = nir_cf_node_as_if(prev_node);
if (if_stmt->control == nir_selection_control_dont_flatten)
return false;
nir_block *then_block = nir_if_first_then_block(if_stmt);
nir_block *else_block = nir_if_first_else_block(if_stmt);
/* We can only have one block in each side ... */
if (nir_if_last_then_block(if_stmt) != then_block ||
nir_if_last_else_block(if_stmt) != else_block)
return false;
if (if_stmt->control == nir_selection_control_flatten) {
/* Override driver defaults */
indirect_load_ok = true;
expensive_alu_ok = true;
}
/* ... and those blocks must only contain "allowed" instructions. */
unsigned count = 0;
if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
indirect_load_ok, expensive_alu_ok) ||
!block_check_for_allowed_instrs(else_block, &count, limit != 0,
indirect_load_ok, expensive_alu_ok))
return false;
if (count > limit && if_stmt->control != nir_selection_control_flatten)
return false;
/* At this point, we know that the previous CFG node is an if-then
* statement containing only moves to phi nodes in this block. We can
* just remove that entire CF node and replace all of the phi nodes with
* selects.
*/
nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
/* First, we move the remaining instructions from the blocks to the
* block before. We have already guaranteed that this is safe by
* calling block_check_for_allowed_instrs()
*/
nir_foreach_instr_safe(instr, then_block) {
exec_node_remove(&instr->node);
instr->block = prev_block;
exec_list_push_tail(&prev_block->instr_list, &instr->node);
}
nir_foreach_instr_safe(instr, else_block) {
exec_node_remove(&instr->node);
instr->block = prev_block;
exec_list_push_tail(&prev_block->instr_list, &instr->node);
}
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_phi)
break;
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
/* Splat the condition to all channels */
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
assert(exec_list_length(&phi->srcs) == 2);
nir_foreach_phi_src(src, phi) {
assert(src->pred == then_block || src->pred == else_block);
assert(src->src.is_ssa);
unsigned idx = src->pred == then_block ? 1 : 2;
nir_src_copy(&sel->src[idx].src, &src->src, sel);
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
phi->dest.ssa.num_components,
phi->dest.ssa.bit_size, phi->dest.ssa.name);
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
nir_src_for_ssa(&sel->dest.dest.ssa));
nir_instr_insert_before(&phi->instr, &sel->instr);
nir_instr_remove(&phi->instr);
}
nir_cf_node_remove(&if_stmt->cf_node);
return true;
}
static bool
nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok)
{
nir_shader *shader = impl->function->shader;
bool progress = false;
nir_foreach_block_safe(block, impl) {
progress |= nir_opt_peephole_select_block(block, shader, limit,
indirect_load_ok,
expensive_alu_ok);
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_none);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
bool
nir_opt_peephole_select(nir_shader *shader, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
progress |= nir_opt_peephole_select_impl(function->impl, limit,
indirect_load_ok,
expensive_alu_ok);
}
return progress;
}