2016-09-15 06:49:57 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
#include "nir_builder.h"
|
|
|
|
#include "nir_control_flow.h"
|
|
|
|
#include "nir_loop_analyze.h"
|
|
|
|
|
2017-02-08 22:20:41 +00:00
|
|
|
|
|
|
|
/* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
|
|
|
|
* instructions. (Multiply counting nodes and magic number 5.) But there is
|
|
|
|
* no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
|
|
|
|
* to give about the same results. Around 5 instructions per node. But some
|
|
|
|
* loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
|
|
|
|
* we set it to 26.
|
|
|
|
*/
|
2018-03-06 03:48:07 +00:00
|
|
|
#define LOOP_UNROLL_LIMIT 26
|
2017-02-08 22:20:41 +00:00
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
/* Prepare this loop for unrolling by first converting to lcssa and then
|
2018-03-26 01:41:51 +01:00
|
|
|
* converting the phis from the top level of the loop body to regs.
|
|
|
|
* Partially converting out of SSA allows us to unroll the loop without having
|
|
|
|
* to keep track of and update phis along the way which gets tricky and
|
|
|
|
* doesn't add much value over converting to regs.
|
2016-09-15 06:49:57 +01:00
|
|
|
*
|
2019-08-19 16:05:39 +01:00
|
|
|
* The loop may have a jump instruction at the end of the loop which does
|
2016-09-15 06:49:57 +01:00
|
|
|
* nothing. Once we're out of SSA, we can safely delete it so we don't have
|
|
|
|
* to deal with it later.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
loop_prepare_for_unroll(nir_loop *loop)
|
|
|
|
{
|
2018-09-11 18:51:09 +01:00
|
|
|
nir_rematerialize_derefs_in_use_blocks_impl(
|
|
|
|
nir_cf_node_get_function(&loop->cf_node));
|
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_convert_loop_to_lcssa(loop);
|
|
|
|
|
2018-03-26 01:41:51 +01:00
|
|
|
/* Lower phis at the top level of the loop body */
|
|
|
|
foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
|
|
|
|
if (nir_cf_node_block == node->type) {
|
|
|
|
nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
|
|
|
|
}
|
|
|
|
}
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-03-26 01:41:51 +01:00
|
|
|
/* Lower phis after the loop */
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_block *block_after_loop =
|
|
|
|
nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
|
|
|
|
|
|
|
nir_lower_phis_to_regs_block(block_after_loop);
|
|
|
|
|
2019-08-19 16:05:39 +01:00
|
|
|
/* Remove jump if it's the last instruction in the loop */
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
|
|
|
|
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
|
|
|
nir_instr_remove(last_instr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
get_first_blocks_in_terminator(nir_loop_terminator *term,
|
|
|
|
nir_block **first_break_block,
|
|
|
|
nir_block **first_continue_block)
|
|
|
|
{
|
|
|
|
if (term->continue_from_then) {
|
|
|
|
*first_continue_block = nir_if_first_then_block(term->nif);
|
|
|
|
*first_break_block = nir_if_first_else_block(term->nif);
|
|
|
|
} else {
|
|
|
|
*first_continue_block = nir_if_first_else_block(term->nif);
|
|
|
|
*first_break_block = nir_if_first_then_block(term->nif);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unroll a loop where we know exactly how many iterations there are and there
|
|
|
|
* is only a single exit point. Note here we can unroll loops with multiple
|
|
|
|
* theoretical exits that only have a single terminating exit that we always
|
|
|
|
* know is the "real" exit.
|
|
|
|
*
|
|
|
|
* loop {
|
|
|
|
* ...instrs...
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* And the iteration count is 3, the output will be:
|
|
|
|
*
|
|
|
|
* ...instrs... ...instrs... ...instrs...
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
simple_unroll(nir_loop *loop)
|
|
|
|
{
|
|
|
|
nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
|
|
|
|
assert(nir_is_trivial_loop_if(limiting_term->nif,
|
|
|
|
limiting_term->break_block));
|
|
|
|
|
|
|
|
loop_prepare_for_unroll(loop);
|
|
|
|
|
|
|
|
/* Skip over loop terminator and get the loop body. */
|
|
|
|
list_for_each_entry(nir_loop_terminator, terminator,
|
|
|
|
&loop->info->loop_terminator_list,
|
|
|
|
loop_terminator_link) {
|
|
|
|
|
|
|
|
/* Remove all but the limiting terminator as we know the other exit
|
|
|
|
* conditions can never be met. Note we need to extract any instructions
|
|
|
|
* in the continue from branch and insert then into the loop body before
|
|
|
|
* removing it.
|
|
|
|
*/
|
|
|
|
if (terminator->nif != limiting_term->nif) {
|
|
|
|
nir_block *first_break_block;
|
|
|
|
nir_block *first_continue_block;
|
|
|
|
get_first_blocks_in_terminator(terminator, &first_break_block,
|
|
|
|
&first_continue_block);
|
|
|
|
|
|
|
|
assert(nir_is_trivial_loop_if(terminator->nif,
|
|
|
|
terminator->break_block));
|
|
|
|
|
|
|
|
nir_cf_list continue_from_lst;
|
|
|
|
nir_cf_extract(&continue_from_lst,
|
|
|
|
nir_before_block(first_continue_block),
|
|
|
|
nir_after_block(terminator->continue_from_block));
|
|
|
|
nir_cf_reinsert(&continue_from_lst,
|
|
|
|
nir_after_cf_node(&terminator->nif->cf_node));
|
|
|
|
|
|
|
|
nir_cf_node_remove(&terminator->nif->cf_node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_block *first_break_block;
|
|
|
|
nir_block *first_continue_block;
|
|
|
|
get_first_blocks_in_terminator(limiting_term, &first_break_block,
|
|
|
|
&first_continue_block);
|
|
|
|
|
|
|
|
/* Pluck out the loop header */
|
|
|
|
nir_block *header_blk = nir_loop_first_block(loop);
|
|
|
|
nir_cf_list lp_header;
|
|
|
|
nir_cf_extract(&lp_header, nir_before_block(header_blk),
|
|
|
|
nir_before_cf_node(&limiting_term->nif->cf_node));
|
|
|
|
|
|
|
|
/* Add the continue from block of the limiting terminator to the loop body
|
|
|
|
*/
|
|
|
|
nir_cf_list continue_from_lst;
|
|
|
|
nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
|
|
|
|
nir_after_block(limiting_term->continue_from_block));
|
|
|
|
nir_cf_reinsert(&continue_from_lst,
|
|
|
|
nir_after_cf_node(&limiting_term->nif->cf_node));
|
|
|
|
|
|
|
|
/* Pluck out the loop body */
|
|
|
|
nir_cf_list loop_body;
|
|
|
|
nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
|
2019-01-11 19:50:53 +00:00
|
|
|
struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-11-16 03:58:03 +00:00
|
|
|
/* Clone the loop header and insert before the loop */
|
|
|
|
nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
|
|
|
|
nir_before_cf_node(&loop->cf_node),
|
|
|
|
remap_table);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-11-20 00:35:37 +00:00
|
|
|
for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
|
2018-11-16 03:58:03 +00:00
|
|
|
/* Clone loop body and insert before the loop */
|
|
|
|
nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
|
|
|
|
nir_before_cf_node(&loop->cf_node),
|
|
|
|
remap_table);
|
|
|
|
|
|
|
|
/* Clone loop header and insert after loop body */
|
|
|
|
nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
|
|
|
|
nir_before_cf_node(&loop->cf_node),
|
|
|
|
remap_table);
|
2016-09-15 06:49:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the break from the loop terminator and add instructions from
|
|
|
|
* the break block after the unrolled loop.
|
|
|
|
*/
|
|
|
|
nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
|
|
|
|
nir_instr_remove(break_instr);
|
|
|
|
nir_cf_list break_list;
|
|
|
|
nir_cf_extract(&break_list, nir_before_block(first_break_block),
|
|
|
|
nir_after_block(limiting_term->break_block));
|
|
|
|
|
|
|
|
/* Clone so things get properly remapped */
|
2018-11-16 03:58:03 +00:00
|
|
|
nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
|
|
|
|
nir_before_cf_node(&loop->cf_node),
|
|
|
|
remap_table);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
|
|
|
/* Remove the loop */
|
|
|
|
nir_cf_node_remove(&loop->cf_node);
|
|
|
|
|
|
|
|
/* Delete the original loop body, break block & header */
|
|
|
|
nir_cf_delete(&lp_header);
|
|
|
|
nir_cf_delete(&loop_body);
|
|
|
|
nir_cf_delete(&break_list);
|
|
|
|
|
|
|
|
_mesa_hash_table_destroy(remap_table, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
|
|
|
|
{
|
|
|
|
/* Move the rest of the loop inside the continue-from-block */
|
|
|
|
nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
|
|
|
|
|
|
|
|
/* Remove the break */
|
|
|
|
nir_instr_remove(nir_block_last_instr(term->break_block));
|
|
|
|
}
|
|
|
|
|
|
|
|
static nir_cursor
|
|
|
|
get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
|
|
|
|
{
|
|
|
|
if (node->type == nir_cf_node_loop) {
|
|
|
|
return nir_before_cf_node(node);
|
|
|
|
} else {
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(node);
|
|
|
|
if (continue_from_then) {
|
|
|
|
return nir_after_block(nir_if_last_then_block(if_stmt));
|
|
|
|
} else {
|
|
|
|
return nir_after_block(nir_if_last_else_block(if_stmt));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-15 08:51:20 +00:00
|
|
|
static nir_cf_node *
|
|
|
|
complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
|
|
|
|
nir_cf_list *lp_header, nir_cf_list *lp_body,
|
|
|
|
struct hash_table *remap_table,
|
|
|
|
unsigned num_times_to_clone)
|
|
|
|
{
|
|
|
|
/* In the terminator that we have no trip count for move everything after
|
|
|
|
* the terminator into the continue from branch.
|
|
|
|
*/
|
|
|
|
nir_cf_list loop_end;
|
|
|
|
nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
move_cf_list_into_loop_term(&loop_end, unlimit_term);
|
|
|
|
|
|
|
|
/* Pluck out the loop body. */
|
|
|
|
nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
|
|
|
|
/* Set unroll_loc to the loop as we will insert the unrolled loop before it
|
|
|
|
*/
|
|
|
|
nir_cf_node *unroll_loc = &loop->cf_node;
|
|
|
|
|
|
|
|
/* Temp list to store the cloned loop as we unroll */
|
|
|
|
nir_cf_list unrolled_lp_body;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_times_to_clone; i++) {
|
|
|
|
|
|
|
|
nir_cursor cursor =
|
|
|
|
get_complex_unroll_insert_location(unroll_loc,
|
|
|
|
unlimit_term->continue_from_then);
|
|
|
|
|
|
|
|
/* Clone loop header and insert in if branch */
|
|
|
|
nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
|
|
|
|
cursor, remap_table);
|
|
|
|
|
|
|
|
cursor =
|
|
|
|
get_complex_unroll_insert_location(unroll_loc,
|
|
|
|
unlimit_term->continue_from_then);
|
|
|
|
|
|
|
|
/* Clone loop body */
|
|
|
|
nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
|
|
|
|
remap_table);
|
|
|
|
|
|
|
|
unroll_loc = exec_node_data(nir_cf_node,
|
|
|
|
exec_list_get_tail(&unrolled_lp_body.list),
|
|
|
|
node);
|
|
|
|
assert(unroll_loc->type == nir_cf_node_block &&
|
|
|
|
exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
|
|
|
|
|
|
|
|
/* Get the unrolled if node */
|
|
|
|
unroll_loc = nir_cf_node_prev(unroll_loc);
|
|
|
|
|
|
|
|
/* Insert unrolled loop body */
|
|
|
|
nir_cf_reinsert(&unrolled_lp_body, cursor);
|
|
|
|
}
|
|
|
|
|
|
|
|
return unroll_loc;
|
|
|
|
}
|
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
/**
|
|
|
|
* Unroll a loop with two exists when the trip count of one of the exits is
|
|
|
|
* unknown. If continue_from_then is true, the loop is repeated only when the
|
|
|
|
* "then" branch of the if is taken; otherwise it is repeated only
|
|
|
|
* when the "else" branch of the if is taken.
|
|
|
|
*
|
|
|
|
* For example, if the input is:
|
|
|
|
*
|
|
|
|
* loop {
|
|
|
|
* ...phis/condition...
|
|
|
|
* if condition {
|
|
|
|
* ...then instructions...
|
|
|
|
* } else {
|
|
|
|
* ...continue instructions...
|
|
|
|
* break
|
|
|
|
* }
|
|
|
|
* ...body...
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* And the iteration count is 3, and unlimit_term->continue_from_then is true,
|
|
|
|
* then the output will be:
|
|
|
|
*
|
|
|
|
* ...condition...
|
|
|
|
* if condition {
|
|
|
|
* ...then instructions...
|
|
|
|
* ...body...
|
|
|
|
* if condition {
|
|
|
|
* ...then instructions...
|
|
|
|
* ...body...
|
|
|
|
* if condition {
|
|
|
|
* ...then instructions...
|
|
|
|
* ...body...
|
|
|
|
* } else {
|
|
|
|
* ...continue instructions...
|
|
|
|
* }
|
|
|
|
* } else {
|
|
|
|
* ...continue instructions...
|
|
|
|
* }
|
|
|
|
* } else {
|
|
|
|
* ...continue instructions...
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
|
|
|
|
bool limiting_term_second)
|
|
|
|
{
|
|
|
|
assert(nir_is_trivial_loop_if(unlimit_term->nif,
|
|
|
|
unlimit_term->break_block));
|
|
|
|
|
|
|
|
nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
|
|
|
|
assert(nir_is_trivial_loop_if(limiting_term->nif,
|
|
|
|
limiting_term->break_block));
|
|
|
|
|
|
|
|
loop_prepare_for_unroll(loop);
|
|
|
|
|
|
|
|
nir_block *header_blk = nir_loop_first_block(loop);
|
|
|
|
|
|
|
|
nir_cf_list lp_header;
|
|
|
|
nir_cf_list limit_break_list;
|
|
|
|
unsigned num_times_to_clone;
|
|
|
|
if (limiting_term_second) {
|
|
|
|
/* Pluck out the loop header */
|
|
|
|
nir_cf_extract(&lp_header, nir_before_block(header_blk),
|
|
|
|
nir_before_cf_node(&unlimit_term->nif->cf_node));
|
|
|
|
|
|
|
|
/* We need some special handling when its the second terminator causing
|
|
|
|
* us to exit the loop for example:
|
|
|
|
*
|
|
|
|
* for (int i = 0; i < uniform_lp_count; i++) {
|
|
|
|
* colour = vec4(0.0, 1.0, 0.0, 1.0);
|
|
|
|
*
|
|
|
|
* if (i == 1) {
|
|
|
|
* break;
|
|
|
|
* }
|
|
|
|
* ... any further code is unreachable after i == 1 ...
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
nir_cf_list after_lt;
|
|
|
|
nir_if *limit_if = limiting_term->nif;
|
|
|
|
nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
move_cf_list_into_loop_term(&after_lt, limiting_term);
|
|
|
|
|
|
|
|
/* Because the trip count is the number of times we pass over the entire
|
|
|
|
* loop before hitting a break when the second terminator is the
|
|
|
|
* limiting terminator we can actually execute code inside the loop when
|
|
|
|
* trip count == 0 e.g. the code above the break. So we need to bump
|
|
|
|
* the trip_count in order for the code below to clone anything. When
|
|
|
|
* trip count == 1 we execute the code above the break twice and the
|
|
|
|
* code below it once so we need clone things twice and so on.
|
|
|
|
*/
|
2018-11-20 00:35:37 +00:00
|
|
|
num_times_to_clone = loop->info->max_trip_count + 1;
|
2016-09-15 06:49:57 +01:00
|
|
|
} else {
|
|
|
|
/* Pluck out the loop header */
|
|
|
|
nir_cf_extract(&lp_header, nir_before_block(header_blk),
|
|
|
|
nir_before_cf_node(&limiting_term->nif->cf_node));
|
|
|
|
|
|
|
|
nir_block *first_break_block;
|
|
|
|
nir_block *first_continue_block;
|
|
|
|
get_first_blocks_in_terminator(limiting_term, &first_break_block,
|
|
|
|
&first_continue_block);
|
|
|
|
|
|
|
|
/* Remove the break then extract instructions from the break block so we
|
|
|
|
* can insert them in the innermost else of the unrolled loop.
|
|
|
|
*/
|
|
|
|
nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
|
|
|
|
nir_instr_remove(break_instr);
|
|
|
|
nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
|
|
|
|
nir_after_block(limiting_term->break_block));
|
|
|
|
|
|
|
|
nir_cf_list continue_list;
|
|
|
|
nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
|
|
|
|
nir_after_block(limiting_term->continue_from_block));
|
|
|
|
|
|
|
|
nir_cf_reinsert(&continue_list,
|
|
|
|
nir_after_cf_node(&limiting_term->nif->cf_node));
|
|
|
|
|
|
|
|
nir_cf_node_remove(&limiting_term->nif->cf_node);
|
|
|
|
|
2018-11-20 00:35:37 +00:00
|
|
|
num_times_to_clone = loop->info->max_trip_count;
|
2016-09-15 06:49:57 +01:00
|
|
|
}
|
|
|
|
|
2019-01-11 19:50:53 +00:00
|
|
|
struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-11-15 08:51:20 +00:00
|
|
|
nir_cf_list lp_body;
|
|
|
|
nir_cf_node *unroll_loc =
|
|
|
|
complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
|
|
|
|
remap_table, num_times_to_clone);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
|
|
|
if (!limiting_term_second) {
|
|
|
|
assert(unroll_loc->type == nir_cf_node_if);
|
|
|
|
|
|
|
|
nir_cursor cursor =
|
|
|
|
get_complex_unroll_insert_location(unroll_loc,
|
|
|
|
unlimit_term->continue_from_then);
|
|
|
|
|
2018-11-16 03:58:03 +00:00
|
|
|
/* Clone loop header and insert in if branch */
|
|
|
|
nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
|
|
|
|
cursor, remap_table);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
|
|
|
cursor =
|
|
|
|
get_complex_unroll_insert_location(unroll_loc,
|
|
|
|
unlimit_term->continue_from_then);
|
|
|
|
|
2018-11-16 03:58:03 +00:00
|
|
|
/* Clone so things get properly remapped, and insert break block from
|
|
|
|
* the limiting terminator.
|
|
|
|
*/
|
|
|
|
nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
|
|
|
|
cursor, remap_table);
|
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_cf_delete(&limit_break_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The loop has been unrolled so remove it. */
|
|
|
|
nir_cf_node_remove(&loop->cf_node);
|
|
|
|
|
|
|
|
/* Delete the original loop header and body */
|
|
|
|
nir_cf_delete(&lp_header);
|
2018-11-15 08:51:20 +00:00
|
|
|
nir_cf_delete(&lp_body);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
|
|
|
_mesa_hash_table_destroy(remap_table, NULL);
|
|
|
|
}
|
|
|
|
|
2018-11-20 04:23:45 +00:00
|
|
|
/**
|
|
|
|
* Unroll loops where we only have a single terminator but the exact trip
|
|
|
|
* count is unknown. For example:
|
|
|
|
*
|
|
|
|
* for (int i = 0; i < imin(x, 4); i++)
|
|
|
|
* ...
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
complex_unroll_single_terminator(nir_loop *loop)
|
|
|
|
{
|
|
|
|
assert(list_length(&loop->info->loop_terminator_list) == 1);
|
|
|
|
assert(loop->info->limiting_terminator);
|
|
|
|
assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
|
|
|
|
loop->info->limiting_terminator->break_block));
|
|
|
|
|
|
|
|
nir_loop_terminator *terminator = loop->info->limiting_terminator;
|
|
|
|
|
|
|
|
loop_prepare_for_unroll(loop);
|
|
|
|
|
|
|
|
/* Pluck out the loop header */
|
|
|
|
nir_cf_list lp_header;
|
|
|
|
nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_before_cf_node(&terminator->nif->cf_node));
|
|
|
|
|
|
|
|
struct hash_table *remap_table =
|
|
|
|
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
|
|
|
_mesa_key_pointer_equal);
|
|
|
|
|
|
|
|
/* We need to clone the loop one extra time in order to clone the lcssa
|
|
|
|
* vars for the last iteration (they are inside the following ifs break
|
|
|
|
* branch). We leave other passes to clean up this redundant if.
|
|
|
|
*/
|
|
|
|
unsigned num_times_to_clone = loop->info->max_trip_count + 1;
|
|
|
|
|
|
|
|
nir_cf_list lp_body;
|
2019-06-22 15:21:44 +01:00
|
|
|
UNUSED nir_cf_node *unroll_loc =
|
2018-11-20 04:23:45 +00:00
|
|
|
complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
|
|
|
|
remap_table, num_times_to_clone);
|
|
|
|
|
|
|
|
/* Delete the original loop header and body */
|
|
|
|
nir_cf_delete(&lp_header);
|
|
|
|
nir_cf_delete(&lp_body);
|
|
|
|
|
|
|
|
/* The original loop has been replaced so remove it. */
|
|
|
|
nir_cf_node_remove(&loop->cf_node);
|
|
|
|
|
|
|
|
_mesa_hash_table_destroy(remap_table, NULL);
|
|
|
|
}
|
|
|
|
|
2018-07-07 08:56:26 +01:00
|
|
|
/* Unrolls the classic wrapper loops e.g
|
|
|
|
*
|
|
|
|
* do {
|
|
|
|
* // ...
|
|
|
|
* } while (false)
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
wrapper_unroll(nir_loop *loop)
|
|
|
|
{
|
2019-10-28 10:27:52 +00:00
|
|
|
if (!list_is_empty(&loop->info->loop_terminator_list)) {
|
2018-07-07 08:56:26 +01:00
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
/* Unrolling a loop with a large number of exits can result in a
|
|
|
|
* large inrease in register pressure. For now we just skip
|
|
|
|
* unrolling if we have more than 3 exits (not including the break
|
|
|
|
* at the end of the loop).
|
|
|
|
*
|
|
|
|
* TODO: Most loops that fit this pattern are simply switch
|
|
|
|
* statements that are converted to a loop to take advantage of
|
|
|
|
* exiting jump instruction handling. In this case we could make
|
|
|
|
* use of a binary seach pattern like we do in
|
|
|
|
* nir_lower_indirect_derefs(), this should allow us to unroll the
|
|
|
|
* loops in an optimal way and should also avoid some of the
|
|
|
|
* register pressure that comes from simply nesting the
|
|
|
|
* terminators one after the other.
|
|
|
|
*/
|
|
|
|
if (list_length(&loop->info->loop_terminator_list) > 3)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
loop_prepare_for_unroll(loop);
|
|
|
|
|
|
|
|
nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
|
|
|
|
list_for_each_entry(nir_loop_terminator, terminator,
|
|
|
|
&loop->info->loop_terminator_list,
|
|
|
|
loop_terminator_link) {
|
|
|
|
|
|
|
|
/* Remove break from the terminator */
|
|
|
|
nir_instr *break_instr =
|
|
|
|
nir_block_last_instr(terminator->break_block);
|
|
|
|
nir_instr_remove(break_instr);
|
|
|
|
|
|
|
|
/* Pluck out the loop body. */
|
|
|
|
nir_cf_list loop_body;
|
|
|
|
nir_cf_extract(&loop_body,
|
|
|
|
nir_after_cf_node(&terminator->nif->cf_node),
|
|
|
|
loop_end);
|
|
|
|
|
|
|
|
/* Reinsert loop body into continue from block */
|
|
|
|
nir_cf_reinsert(&loop_body,
|
|
|
|
nir_after_block(terminator->continue_from_block));
|
|
|
|
|
|
|
|
loop_end = terminator->continue_from_then ?
|
|
|
|
nir_after_block(nir_if_last_then_block(terminator->nif)) :
|
|
|
|
nir_after_block(nir_if_last_else_block(terminator->nif));
|
|
|
|
}
|
|
|
|
} else {
|
2019-08-19 16:13:12 +01:00
|
|
|
loop_prepare_for_unroll(loop);
|
2018-07-11 01:53:41 +01:00
|
|
|
}
|
2018-07-07 08:56:26 +01:00
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
/* Pluck out the loop body. */
|
|
|
|
nir_cf_list loop_body;
|
|
|
|
nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
2018-07-07 08:56:26 +01:00
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
/* Reinsert loop body after the loop */
|
|
|
|
nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
|
2018-07-07 08:56:26 +01:00
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
/* The loop has been unrolled so remove it. */
|
|
|
|
nir_cf_node_remove(&loop->cf_node);
|
2018-07-07 08:56:26 +01:00
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
return true;
|
2018-07-07 08:56:26 +01:00
|
|
|
}
|
|
|
|
|
2018-11-20 03:05:09 +00:00
|
|
|
static bool
|
|
|
|
is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
|
|
|
|
unsigned trip_count)
|
|
|
|
{
|
|
|
|
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
|
|
|
|
if (d->deref_type != nir_deref_type_array)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
|
|
|
|
nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
|
|
|
|
if (!nir_srcs_equal(d->arr.index, src))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(d);
|
|
|
|
assert(glsl_type_is_array(parent->type) ||
|
2020-08-17 13:19:43 +01:00
|
|
|
glsl_type_is_matrix(parent->type) ||
|
|
|
|
glsl_type_is_vector(parent->type));
|
2018-11-20 03:05:09 +00:00
|
|
|
|
|
|
|
/* We have already unrolled the loop and the new one will be imbedded in
|
|
|
|
* the innermost continue branch. So unless the array is greater than
|
|
|
|
* the trip count any iteration over the loop will be an out of bounds
|
|
|
|
* access of the array.
|
|
|
|
*/
|
2020-08-17 13:19:43 +01:00
|
|
|
unsigned length = glsl_type_is_vector(parent->type) ?
|
|
|
|
glsl_get_vector_elements(parent->type) :
|
|
|
|
glsl_get_length(parent->type);
|
|
|
|
return length <= trip_count;
|
2018-11-20 03:05:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we know an array access is going to be out of bounds remove or replace
|
|
|
|
* the access with an undef. This can later result in the entire loop being
|
|
|
|
* removed by nir_opt_dead_cf().
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
|
|
|
|
nir_loop_terminator *term,
|
|
|
|
nir_cf_list *lp_header,
|
|
|
|
nir_cf_list *lp_body,
|
|
|
|
unsigned trip_count)
|
|
|
|
{
|
|
|
|
if (!loop->info->guessed_trip_count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Temporarily recreate the original loop so we can alter it */
|
|
|
|
nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
|
|
|
|
nir_builder b;
|
|
|
|
nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
|
|
|
|
|
|
|
|
nir_foreach_block_in_cf_node(block, &loop->cf_node) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
/* Check for arrays variably-indexed by a loop induction variable.
|
|
|
|
* If this access is out of bounds remove the instruction or replace
|
|
|
|
* its use with an undefined instruction.
|
|
|
|
* If the loop is no longer useful we leave it for the appropriate
|
|
|
|
* pass to clean it up for us.
|
|
|
|
*/
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref ||
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_deref ||
|
|
|
|
intrin->intrinsic == nir_intrinsic_copy_deref) {
|
|
|
|
|
|
|
|
if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
|
|
|
|
trip_count)) {
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
|
|
|
nir_ssa_def *undef =
|
2019-03-12 21:25:39 +00:00
|
|
|
nir_ssa_undef(&b, intrin->dest.ssa.num_components,
|
|
|
|
intrin->dest.ssa.bit_size);
|
2018-11-20 03:05:09 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
2021-03-03 06:13:38 +00:00
|
|
|
undef);
|
2018-11-20 03:05:09 +00:00
|
|
|
} else {
|
|
|
|
nir_instr_remove(instr);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_copy_deref &&
|
|
|
|
is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
|
|
|
|
trip_count)) {
|
|
|
|
nir_instr_remove(instr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now that we are done extract the loop header and body again */
|
|
|
|
nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_before_cf_node(&term->nif->cf_node));
|
|
|
|
nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_after_block(nir_loop_last_block(loop)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Partially unrolls loops that don't have a known trip count.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
|
|
|
|
{
|
|
|
|
assert(list_length(&loop->info->loop_terminator_list) == 1);
|
|
|
|
|
|
|
|
nir_loop_terminator *terminator =
|
|
|
|
list_first_entry(&loop->info->loop_terminator_list,
|
|
|
|
nir_loop_terminator, loop_terminator_link);
|
|
|
|
|
|
|
|
assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
|
|
|
|
|
|
|
|
loop_prepare_for_unroll(loop);
|
|
|
|
|
|
|
|
/* Pluck out the loop header */
|
|
|
|
nir_cf_list lp_header;
|
|
|
|
nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
|
|
|
|
nir_before_cf_node(&terminator->nif->cf_node));
|
|
|
|
|
|
|
|
struct hash_table *remap_table =
|
|
|
|
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
|
|
|
_mesa_key_pointer_equal);
|
|
|
|
|
|
|
|
nir_cf_list lp_body;
|
|
|
|
nir_cf_node *unroll_loc =
|
|
|
|
complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
|
|
|
|
remap_table, trip_count);
|
|
|
|
|
|
|
|
/* Attempt to remove out of bounds array access */
|
|
|
|
remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
|
|
|
|
&lp_body, trip_count);
|
|
|
|
|
|
|
|
nir_cursor cursor =
|
|
|
|
get_complex_unroll_insert_location(unroll_loc,
|
|
|
|
terminator->continue_from_then);
|
|
|
|
|
|
|
|
/* Reinsert the loop in the innermost nested continue branch of the unrolled
|
|
|
|
* loop.
|
|
|
|
*/
|
|
|
|
nir_loop *new_loop = nir_loop_create(shader);
|
|
|
|
nir_cf_node_insert(cursor, &new_loop->cf_node);
|
|
|
|
new_loop->partially_unrolled = true;
|
|
|
|
|
|
|
|
/* Clone loop header and insert into new loop */
|
|
|
|
nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
|
|
|
|
nir_after_cf_list(&new_loop->body),
|
|
|
|
remap_table);
|
|
|
|
|
|
|
|
/* Clone loop body and insert into new loop */
|
|
|
|
nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
|
|
|
|
nir_after_cf_list(&new_loop->body),
|
|
|
|
remap_table);
|
|
|
|
|
|
|
|
/* Insert break back into terminator */
|
|
|
|
nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
|
|
|
|
nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
|
|
|
|
if (terminator->continue_from_then) {
|
|
|
|
nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
|
|
|
|
} else {
|
|
|
|
nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Delete the original loop header and body */
|
|
|
|
nir_cf_delete(&lp_header);
|
|
|
|
nir_cf_delete(&lp_body);
|
|
|
|
|
|
|
|
/* The original loop has been replaced so remove it. */
|
|
|
|
nir_cf_node_remove(&loop->cf_node);
|
|
|
|
|
|
|
|
_mesa_hash_table_destroy(remap_table, NULL);
|
|
|
|
}
|
|
|
|
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
static bool
|
|
|
|
is_indirect_load(nir_instr *instr)
|
|
|
|
{
|
|
|
|
if (instr->type == nir_instr_type_intrinsic) {
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
|
2021-04-12 15:36:59 +01:00
|
|
|
intrin->intrinsic == nir_intrinsic_load_ssbo) &&
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
!nir_src_is_const(intrin->src[1])) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-04-12 15:36:59 +01:00
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_global)
|
|
|
|
return true;
|
|
|
|
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_deref ||
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_deref) {
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
|
|
|
|
if (!nir_deref_mode_may_be(deref, mem_modes))
|
|
|
|
return false;
|
|
|
|
while (deref) {
|
|
|
|
if ((deref->deref_type == nir_deref_type_array ||
|
|
|
|
deref->deref_type == nir_deref_type_ptr_as_array) &&
|
|
|
|
!nir_src_is_const(deref->arr.index)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
deref = nir_deref_instr_parent(deref);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (instr->type == nir_instr_type_tex) {
|
|
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
|
|
|
if (!nir_src_is_const(tex->src[i].src))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
can_pipeline_loads(nir_loop *loop)
|
|
|
|
{
|
|
|
|
if (!loop->info->exact_trip_count_known)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool interesting_loads = false;
|
|
|
|
|
|
|
|
foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
|
|
|
|
if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Control flow usually prevents useful scheduling */
|
|
|
|
if (cf_node->type != nir_cf_node_block)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (interesting_loads)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_block *block = nir_cf_node_as_block(cf_node);
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
if (is_indirect_load(instr)) {
|
|
|
|
interesting_loads = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return interesting_loads;
|
|
|
|
}
|
|
|
|
|
2019-03-20 02:39:36 +00:00
|
|
|
/*
|
|
|
|
* Returns true if we should unroll the loop, otherwise false.
|
|
|
|
*/
|
2016-09-15 06:49:57 +01:00
|
|
|
static bool
|
2019-03-20 02:39:36 +00:00
|
|
|
check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
|
2016-09-15 06:49:57 +01:00
|
|
|
{
|
2019-03-20 02:39:36 +00:00
|
|
|
if (loop->control == nir_loop_control_unroll)
|
|
|
|
return true;
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2019-03-20 02:39:36 +00:00
|
|
|
if (loop->control == nir_loop_control_dont_unroll)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
nir_loop_info *li = loop->info;
|
|
|
|
unsigned max_iter = shader->options->max_unroll_iterations;
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
/* Unroll much more aggressively if it can hide load latency. */
|
|
|
|
if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
|
|
|
|
max_iter = shader->options->max_unroll_iterations_aggressive;
|
2018-11-20 03:05:09 +00:00
|
|
|
unsigned trip_count =
|
|
|
|
li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
|
|
|
|
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
|
2016-09-15 06:49:57 +01:00
|
|
|
return true;
|
|
|
|
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
|
|
|
|
unsigned cost = li->instr_cost * trip_count;
|
|
|
|
|
|
|
|
if (cost <= cost_limit && trip_count <= max_iter)
|
|
|
|
return true;
|
2016-09-15 06:49:57 +01:00
|
|
|
|
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems
to increase FPS at the very start of the game by ~5% (RX 580, 1080p,
medium settings, no MSAA).
fossil-db (Sienna):
Totals from 81 (0.06% of 139391) affected shaders:
SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35%
VGPRs: 4132 -> 4648 (+12.49%)
CodeSize: 275532 -> 659188 (+139.24%)
MaxWaves: 986 -> 906 (-8.11%)
Instrs: 54422 -> 126865 (+133.11%)
Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60%
VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30%
SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31%
VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61%
SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66%
Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49%
PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
2020-09-01 11:55:58 +01:00
|
|
|
return false;
|
2016-09-15 06:49:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
2020-03-13 14:06:07 +00:00
|
|
|
process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
|
|
|
|
bool *unrolled_this_block);
|
|
|
|
|
|
|
|
static bool
|
|
|
|
process_loops_in_block(nir_shader *sh, struct exec_list *block,
|
|
|
|
bool *has_nested_loop_out)
|
|
|
|
{
|
|
|
|
/* We try to unroll as many loops in one pass as possible.
|
|
|
|
* E.g. we can safely unroll both loops in this block:
|
|
|
|
*
|
|
|
|
* if (...) {
|
|
|
|
* loop {...}
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* if (...) {
|
|
|
|
* loop {...}
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* Unrolling one loop doesn't affect the other one.
|
|
|
|
*
|
|
|
|
* On the other hand for block with:
|
|
|
|
*
|
|
|
|
* loop {...}
|
|
|
|
* ...
|
|
|
|
* loop {...}
|
|
|
|
*
|
|
|
|
* It is unsafe to unroll both loops in one pass without taking
|
|
|
|
* complicating precautions, since the structure of the block would
|
|
|
|
* change after unrolling the first loop. So in such a case we leave
|
|
|
|
* the second loop for the next iteration of unrolling to handle.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
bool unrolled_this_block = false;
|
|
|
|
|
|
|
|
foreach_list_typed(nir_cf_node, nested_node, node, block) {
|
|
|
|
if (process_loops(sh, nested_node,
|
|
|
|
has_nested_loop_out, &unrolled_this_block)) {
|
|
|
|
progress = true;
|
|
|
|
|
|
|
|
/* If current node is unrolled we could not safely continue
|
|
|
|
* our iteration since we don't know the next node
|
|
|
|
* and it's hard to guarantee that we won't end up unrolling
|
|
|
|
* inner loop of the currently unrolled one, if such exists.
|
|
|
|
*/
|
|
|
|
if (unrolled_this_block) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
|
|
|
|
bool *unrolled_this_block)
|
2016-09-15 06:49:57 +01:00
|
|
|
{
|
|
|
|
bool progress = false;
|
2018-07-23 09:02:28 +01:00
|
|
|
bool has_nested_loop = false;
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_loop *loop;
|
|
|
|
|
|
|
|
switch (cf_node->type) {
|
|
|
|
case nir_cf_node_block:
|
|
|
|
return progress;
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(cf_node);
|
2020-03-13 14:06:07 +00:00
|
|
|
progress |= process_loops_in_block(sh, &if_stmt->then_list,
|
|
|
|
has_nested_loop_out);
|
|
|
|
progress |= process_loops_in_block(sh, &if_stmt->else_list,
|
|
|
|
has_nested_loop_out);
|
2016-09-15 06:49:57 +01:00
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
case nir_cf_node_loop: {
|
|
|
|
loop = nir_cf_node_as_loop(cf_node);
|
2020-03-30 22:39:01 +01:00
|
|
|
progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
|
2018-07-23 09:02:28 +01:00
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
unreachable("unknown cf node type");
|
|
|
|
}
|
|
|
|
|
2020-03-13 14:06:07 +00:00
|
|
|
const bool unrolled_child_block = progress;
|
|
|
|
|
2018-07-23 09:02:28 +01:00
|
|
|
/* Don't attempt to unroll a second inner loop in this pass, wait until the
|
|
|
|
* next pass as we have altered the cf.
|
|
|
|
*/
|
2019-03-20 02:39:36 +00:00
|
|
|
if (!progress && loop->control != nir_loop_control_dont_unroll) {
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-07-07 08:56:26 +01:00
|
|
|
/* Check for the classic
|
|
|
|
*
|
|
|
|
* do {
|
|
|
|
* // ...
|
|
|
|
* } while (false)
|
|
|
|
*
|
|
|
|
* that is used to wrap multi-line macros. GLSL IR also wraps switch
|
|
|
|
* statements in a loop like this.
|
|
|
|
*/
|
|
|
|
if (loop->info->limiting_terminator == NULL &&
|
|
|
|
!loop->info->complex_loop) {
|
|
|
|
|
2018-07-11 01:53:41 +01:00
|
|
|
nir_block *last_loop_blk = nir_loop_last_block(loop);
|
2018-11-20 03:05:09 +00:00
|
|
|
if (nir_block_ends_in_break(last_loop_blk)) {
|
|
|
|
progress = wrapper_unroll(loop);
|
2018-07-11 01:53:41 +01:00
|
|
|
goto exit;
|
2018-11-20 03:05:09 +00:00
|
|
|
}
|
2018-07-11 01:53:41 +01:00
|
|
|
|
2018-11-20 03:05:09 +00:00
|
|
|
/* If we were able to guess the loop iteration based on array access
|
|
|
|
* then do a partial unroll.
|
|
|
|
*/
|
|
|
|
unsigned num_lt = list_length(&loop->info->loop_terminator_list);
|
|
|
|
if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
|
|
|
|
loop->info->guessed_trip_count &&
|
2019-03-20 02:39:36 +00:00
|
|
|
check_unrolling_restrictions(sh, loop)) {
|
2018-11-20 03:05:09 +00:00
|
|
|
partial_unroll(sh, loop, loop->info->guessed_trip_count);
|
|
|
|
progress = true;
|
|
|
|
}
|
2018-07-07 08:56:26 +01:00
|
|
|
}
|
|
|
|
|
2018-11-20 03:05:09 +00:00
|
|
|
if (has_nested_loop || !loop->info->limiting_terminator)
|
2018-07-23 09:02:28 +01:00
|
|
|
goto exit;
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2019-03-20 02:39:36 +00:00
|
|
|
if (!check_unrolling_restrictions(sh, loop))
|
2018-07-23 09:02:28 +01:00
|
|
|
goto exit;
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2018-11-20 00:35:37 +00:00
|
|
|
if (loop->info->exact_trip_count_known) {
|
2016-09-15 06:49:57 +01:00
|
|
|
simple_unroll(loop);
|
|
|
|
progress = true;
|
|
|
|
} else {
|
|
|
|
/* Attempt to unroll loops with two terminators. */
|
|
|
|
unsigned num_lt = list_length(&loop->info->loop_terminator_list);
|
2018-11-20 02:45:58 +00:00
|
|
|
if (num_lt == 2 &&
|
|
|
|
!loop->info->limiting_terminator->exact_trip_count_unknown) {
|
2016-09-15 06:49:57 +01:00
|
|
|
bool limiting_term_second = true;
|
|
|
|
nir_loop_terminator *terminator =
|
2018-06-04 07:26:46 +01:00
|
|
|
list_first_entry(&loop->info->loop_terminator_list,
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_loop_terminator, loop_terminator_link);
|
|
|
|
|
|
|
|
|
|
|
|
if (terminator->nif == loop->info->limiting_terminator->nif) {
|
|
|
|
limiting_term_second = false;
|
|
|
|
terminator =
|
2018-06-04 07:26:46 +01:00
|
|
|
list_last_entry(&loop->info->loop_terminator_list,
|
2016-09-15 06:49:57 +01:00
|
|
|
nir_loop_terminator, loop_terminator_link);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the first terminator has a trip count of zero and is the
|
|
|
|
* limiting terminator just do a simple unroll as the second
|
|
|
|
* terminator can never be reached.
|
|
|
|
*/
|
2018-11-20 00:35:37 +00:00
|
|
|
if (loop->info->max_trip_count == 0 && !limiting_term_second) {
|
2016-09-15 06:49:57 +01:00
|
|
|
simple_unroll(loop);
|
|
|
|
} else {
|
|
|
|
complex_unroll(loop, terminator, limiting_term_second);
|
|
|
|
}
|
|
|
|
progress = true;
|
|
|
|
}
|
2018-11-20 04:23:45 +00:00
|
|
|
|
|
|
|
if (num_lt == 1) {
|
|
|
|
assert(loop->info->limiting_terminator->exact_trip_count_unknown);
|
|
|
|
complex_unroll_single_terminator(loop);
|
|
|
|
progress = true;
|
|
|
|
}
|
2016-09-15 06:49:57 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-23 09:02:28 +01:00
|
|
|
exit:
|
|
|
|
*has_nested_loop_out = true;
|
2020-03-13 14:06:07 +00:00
|
|
|
if (progress && !unrolled_child_block)
|
|
|
|
*unrolled_this_block = true;
|
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
nir_opt_loop_unroll_impl(nir_function_impl *impl,
|
|
|
|
nir_variable_mode indirect_mask)
|
|
|
|
{
|
|
|
|
bool progress = false;
|
|
|
|
nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
|
|
|
|
nir_metadata_require(impl, nir_metadata_block_index);
|
|
|
|
|
2020-03-13 14:06:07 +00:00
|
|
|
bool has_nested_loop = false;
|
|
|
|
progress |= process_loops_in_block(impl->function->shader, &impl->body,
|
|
|
|
&has_nested_loop);
|
2016-09-15 06:49:57 +01:00
|
|
|
|
2020-05-22 04:34:37 +01:00
|
|
|
if (progress) {
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
2016-12-14 02:19:42 +00:00
|
|
|
nir_lower_regs_to_ssa_impl(impl);
|
2020-05-22 04:34:37 +01:00
|
|
|
} else {
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
}
|
2016-09-15 06:49:57 +01:00
|
|
|
|
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
2018-06-15 21:12:23 +01:00
|
|
|
/**
|
|
|
|
* indirect_mask specifies which type of indirectly accessed variables
|
|
|
|
* should force loop unrolling.
|
|
|
|
*/
|
2016-09-15 06:49:57 +01:00
|
|
|
bool
|
|
|
|
nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
|
|
|
|
{
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
if (function->impl) {
|
|
|
|
progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return progress;
|
|
|
|
}
|