diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 985f07c7d3c..968aa78ade3 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3578,6 +3578,11 @@ typedef struct nir_shader_compiler_options { * vectorized IO can pack more varyings when linking. */ bool linker_ignore_precision; + /* Specifies if indirect sampler array access will trigger forced loop + * unrolling. + */ + bool force_indirect_unrolling_sampler; + /** * Specifies which type of indirectly accessed variables should force * loop unrolling. @@ -5312,7 +5317,8 @@ void nir_live_ssa_defs_impl(nir_function_impl *impl); const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx); void nir_loop_analyze_impl(nir_function_impl *impl, - nir_variable_mode indirect_mask); + nir_variable_mode indirect_mask, + bool force_unroll_sampler_indirect); bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); diff --git a/src/compiler/nir/nir_inline_uniforms.c b/src/compiler/nir/nir_inline_uniforms.c index 652b9534691..57c29fc07bd 100644 --- a/src/compiler/nir/nir_inline_uniforms.c +++ b/src/compiler/nir/nir_inline_uniforms.c @@ -321,7 +321,8 @@ nir_find_inlinable_uniforms(nir_shader *shader) nir_foreach_function(function, shader) { if (function->impl) { - nir_metadata_require(function->impl, nir_metadata_loop_analysis, nir_var_all); + nir_metadata_require(function->impl, nir_metadata_loop_analysis, + nir_var_all, false); foreach_list_typed(nir_cf_node, node, node, &function->impl->body) process_node(node, NULL, uni_offsets, &num_offsets); diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index c35a129d428..3b54a068429 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -77,6 +77,7 @@ typedef struct { nir_variable_mode indirect_mask; + bool force_unroll_sampler_indirect; } loop_info_state; static nir_loop_variable * @@ -601,6 +602,7 @@ find_array_access_via_induction(loop_info_state *state, *array_index_out = array_index; nir_deref_instr *parent = nir_deref_instr_parent(d); + if (glsl_type_is_array_or_matrix(parent->type)) { return glsl_get_length(parent->type); } else { @@ -1208,7 +1210,8 @@ find_trip_count(loop_info_state *state, unsigned execution_mode) } static bool -force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref) +force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref, + bool contains_sampler) { unsigned array_size = find_array_access_via_induction(state, deref, NULL); if (array_size) { @@ -1221,6 +1224,9 @@ force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref) if (nir_deref_mode_must_be(deref, state->indirect_mask)) return true; + + if (contains_sampler && state->force_unroll_sampler_indirect) + return true; } return false; @@ -1230,6 +1236,22 @@ static bool force_unroll_heuristics(loop_info_state *state, nir_block *block) { nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + int sampler_idx = + nir_tex_instr_src_index(tex_instr, + nir_tex_src_sampler_deref); + + + if (sampler_idx >= 0) { + nir_deref_instr *deref = + nir_instr_as_deref(tex_instr->src[sampler_idx].src.ssa->parent_instr); + if (force_unroll_array_access(state, deref, true)) + return true; + } + } + + if (instr->type != nir_instr_type_intrinsic) continue; @@ -1242,12 +1264,14 @@ force_unroll_heuristics(loop_info_state *state, nir_block *block) intrin->intrinsic == nir_intrinsic_store_deref || intrin->intrinsic == nir_intrinsic_copy_deref) { if (force_unroll_array_access(state, - nir_src_as_deref(intrin->src[0]))) + nir_src_as_deref(intrin->src[0]), + false)) return true; if (intrin->intrinsic == nir_intrinsic_copy_deref && force_unroll_array_access(state, - nir_src_as_deref(intrin->src[1]))) + nir_src_as_deref(intrin->src[1]), + false)) return true; } } @@ -1343,7 +1367,8 @@ initialize_loop_info_state(nir_loop *loop, void *mem_ctx, } static void -process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask) +process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask, + bool force_unroll_sampler_indirect) { switch (cf_node->type) { case nir_cf_node_block: @@ -1351,15 +1376,15 @@ process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask) case nir_cf_node_if: { nir_if *if_stmt = nir_cf_node_as_if(cf_node); foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) - process_loops(nested_node, indirect_mask); + process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) - process_loops(nested_node, indirect_mask); + process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); return; } case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(cf_node); foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) - process_loops(nested_node, indirect_mask); + process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); break; } default: @@ -1372,6 +1397,7 @@ process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask) loop_info_state *state = initialize_loop_info_state(loop, mem_ctx, impl); state->indirect_mask = indirect_mask; + state->force_unroll_sampler_indirect = force_unroll_sampler_indirect; get_loop_info(state, impl); @@ -1380,9 +1406,10 @@ process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask) void nir_loop_analyze_impl(nir_function_impl *impl, - nir_variable_mode indirect_mask) + nir_variable_mode indirect_mask, + bool force_unroll_sampler_indirect) { nir_index_ssa_defs(impl); foreach_list_typed(nir_cf_node, node, node, &impl->body) - process_loops(node, indirect_mask); + process_loops(node, indirect_mask, force_unroll_sampler_indirect); } diff --git a/src/compiler/nir/nir_metadata.c b/src/compiler/nir/nir_metadata.c index 0cc8f0624d0..4c47ef1efb4 100644 --- a/src/compiler/nir/nir_metadata.c +++ b/src/compiler/nir/nir_metadata.c @@ -46,7 +46,13 @@ nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...) if (NEEDS_UPDATE(nir_metadata_loop_analysis)) { va_list ap; va_start(ap, required); - nir_loop_analyze_impl(impl, va_arg(ap, nir_variable_mode)); + /* !! Warning !! Do not move these va_arg() call directly to + * nir_loop_analyze_impl() as parameters because the execution order will + * become undefined. + */ + nir_variable_mode mode = va_arg(ap, nir_variable_mode); + int force_unroll_sampler_indirect = va_arg(ap, int); + nir_loop_analyze_impl(impl, mode, force_unroll_sampler_indirect); va_end(ap); } diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c index 529ca6439e5..694ee641e42 100644 --- a/src/compiler/nir/nir_opt_gcm.c +++ b/src/compiler/nir/nir_opt_gcm.c @@ -735,7 +735,8 @@ opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number) nir_metadata_require(impl, nir_metadata_block_index | nir_metadata_dominance); nir_metadata_require(impl, nir_metadata_loop_analysis, - shader->options->force_indirect_unrolling); + shader->options->force_indirect_unrolling, + shader->options->force_indirect_unrolling_sampler); /* A previous pass may have left pass_flags dirty, so clear it all out. */ nir_foreach_block(block, impl) diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index 49c696dc514..d101d74c5d7 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -1090,10 +1090,12 @@ exit: static bool nir_opt_loop_unroll_impl(nir_function_impl *impl, - nir_variable_mode indirect_mask) + nir_variable_mode indirect_mask, + bool force_unroll_sampler_indirect) { bool progress = false; - nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask); + nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask, + (int) force_unroll_sampler_indirect); nir_metadata_require(impl, nir_metadata_block_index); bool has_nested_loop = false; @@ -1119,10 +1121,12 @@ nir_opt_loop_unroll(nir_shader *shader) { bool progress = false; + bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler; nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling; nir_foreach_function(function, shader) { if (function->impl) { - progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask); + progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask, + force_unroll_sampler_indirect); } } return progress;