ac: move ac_lower_indirect_derefs() outside of the LLVM dir
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10403>
This commit is contained in:
parent
0b7c442d39
commit
1c702a8239
|
@ -44,6 +44,7 @@ AMD_COMMON_FILES = \
|
||||||
common/ac_gpu_info.h \
|
common/ac_gpu_info.h \
|
||||||
common/ac_msgpack.c \
|
common/ac_msgpack.c \
|
||||||
common/ac_msgpack.h \
|
common/ac_msgpack.h \
|
||||||
|
common/ac_nir.c \
|
||||||
common/ac_nir.h \
|
common/ac_nir.h \
|
||||||
common/ac_nir_lower_esgs_io_to_mem.c \
|
common/ac_nir_lower_esgs_io_to_mem.c \
|
||||||
common/ac_nir_lower_tess_io_to_mem.c \
|
common/ac_nir_lower_tess_io_to_mem.c \
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2016 Bas Nieuwenhuizen
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ac_nir.h"
|
||||||
|
|
||||||
|
bool
|
||||||
|
ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||||
|
enum chip_class chip_class)
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
/* Lower large variables to scratch first so that we won't bloat the
|
||||||
|
* shader by generating large if ladders for them. We later lower
|
||||||
|
* scratch to alloca's, assuming LLVM won't generate VGPR indexing.
|
||||||
|
*/
|
||||||
|
NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
|
||||||
|
glsl_get_natural_size_align_bytes);
|
||||||
|
|
||||||
|
/* LLVM doesn't support VGPR indexing on GFX9. */
|
||||||
|
bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
|
||||||
|
|
||||||
|
/* TODO: Indirect indexing of GS inputs is unimplemented.
|
||||||
|
*
|
||||||
|
* TCS and TES load inputs directly from LDS or offchip memory, so
|
||||||
|
* indirect indexing is trivial.
|
||||||
|
*/
|
||||||
|
nir_variable_mode indirect_mask = 0;
|
||||||
|
if (shader->info.stage == MESA_SHADER_GEOMETRY ||
|
||||||
|
(shader->info.stage != MESA_SHADER_TESS_CTRL && shader->info.stage != MESA_SHADER_TESS_EVAL &&
|
||||||
|
!llvm_has_working_vgpr_indexing)) {
|
||||||
|
indirect_mask |= nir_var_shader_in;
|
||||||
|
}
|
||||||
|
if (!llvm_has_working_vgpr_indexing && shader->info.stage != MESA_SHADER_TESS_CTRL)
|
||||||
|
indirect_mask |= nir_var_shader_out;
|
||||||
|
|
||||||
|
/* TODO: We shouldn't need to do this, however LLVM isn't currently
|
||||||
|
* smart enough to handle indirects without causing excess spilling
|
||||||
|
* causing the gpu to hang.
|
||||||
|
*
|
||||||
|
* See the following thread for more details of the problem:
|
||||||
|
* https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
|
||||||
|
*/
|
||||||
|
indirect_mask |= nir_var_function_temp;
|
||||||
|
|
||||||
|
progress |= nir_lower_indirect_derefs(shader, indirect_mask, UINT32_MAX);
|
||||||
|
return progress;
|
||||||
|
}
|
|
@ -82,6 +82,10 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
|
||||||
enum chip_class chip_class,
|
enum chip_class chip_class,
|
||||||
unsigned num_reserved_es_outputs);
|
unsigned num_reserved_es_outputs);
|
||||||
|
|
||||||
|
bool
|
||||||
|
ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||||
|
enum chip_class chip_class);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -87,6 +87,7 @@ amd_common_files = files(
|
||||||
'ac_msgpack.c',
|
'ac_msgpack.c',
|
||||||
'ac_msgpack.h',
|
'ac_msgpack.h',
|
||||||
'ac_rgp_elf_object_pack.c',
|
'ac_rgp_elf_object_pack.c',
|
||||||
|
'ac_nir.c',
|
||||||
'ac_nir.h',
|
'ac_nir.h',
|
||||||
'ac_nir_lower_esgs_io_to_mem.c',
|
'ac_nir_lower_esgs_io_to_mem.c',
|
||||||
'ac_nir_lower_tess_io_to_mem.c',
|
'ac_nir_lower_tess_io_to_mem.c',
|
||||||
|
|
|
@ -5127,47 +5127,6 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
||||||
ralloc_free(ctx.verified_interp);
|
ralloc_free(ctx.verified_interp);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
|
|
||||||
{
|
|
||||||
bool progress = false;
|
|
||||||
|
|
||||||
/* Lower large variables to scratch first so that we won't bloat the
|
|
||||||
* shader by generating large if ladders for them. We later lower
|
|
||||||
* scratch to alloca's, assuming LLVM won't generate VGPR indexing.
|
|
||||||
*/
|
|
||||||
NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
|
|
||||||
glsl_get_natural_size_align_bytes);
|
|
||||||
|
|
||||||
/* LLVM doesn't support VGPR indexing on GFX9. */
|
|
||||||
bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
|
|
||||||
|
|
||||||
/* TODO: Indirect indexing of GS inputs is unimplemented.
|
|
||||||
*
|
|
||||||
* TCS and TES load inputs directly from LDS or offchip memory, so
|
|
||||||
* indirect indexing is trivial.
|
|
||||||
*/
|
|
||||||
nir_variable_mode indirect_mask = 0;
|
|
||||||
if (nir->info.stage == MESA_SHADER_GEOMETRY ||
|
|
||||||
(nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL &&
|
|
||||||
!llvm_has_working_vgpr_indexing)) {
|
|
||||||
indirect_mask |= nir_var_shader_in;
|
|
||||||
}
|
|
||||||
if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL)
|
|
||||||
indirect_mask |= nir_var_shader_out;
|
|
||||||
|
|
||||||
/* TODO: We shouldn't need to do this, however LLVM isn't currently
|
|
||||||
* smart enough to handle indirects without causing excess spilling
|
|
||||||
* causing the gpu to hang.
|
|
||||||
*
|
|
||||||
* See the following thread for more details of the problem:
|
|
||||||
* https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
|
|
||||||
*/
|
|
||||||
indirect_mask |= nir_var_function_temp;
|
|
||||||
|
|
||||||
progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
|
|
||||||
return progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
|
static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
|
||||||
{
|
{
|
||||||
if (intrin->intrinsic != nir_intrinsic_store_output)
|
if (intrin->intrinsic != nir_intrinsic_store_output)
|
||||||
|
|
|
@ -47,8 +47,6 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
|
||||||
return (index * 4) + chan;
|
return (index * 4) + chan;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
|
||||||
|
|
||||||
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
|
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
|
||||||
|
|
||||||
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
#include "ac_exp_param.h"
|
#include "ac_exp_param.h"
|
||||||
#include "ac_llvm_util.h"
|
#include "ac_llvm_util.h"
|
||||||
#include "ac_nir_to_llvm.h"
|
#include "ac_nir_to_llvm.h"
|
||||||
|
#include "ac_nir.h"
|
||||||
#include "ac_shader_util.h"
|
#include "ac_shader_util.h"
|
||||||
#include "aco_interface.h"
|
#include "aco_interface.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
@ -2339,16 +2340,16 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
|
||||||
|
|
||||||
if (progress) {
|
if (progress) {
|
||||||
if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
|
if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
|
||||||
ac_lower_indirect_derefs(ordered_shaders[i],
|
ac_nir_lower_indirect_derefs(ordered_shaders[i],
|
||||||
pipeline->device->physical_device->rad_info.chip_class);
|
pipeline->device->physical_device->rad_info.chip_class);
|
||||||
/* remove dead writes, which can remove input loads */
|
/* remove dead writes, which can remove input loads */
|
||||||
nir_lower_vars_to_ssa(ordered_shaders[i]);
|
nir_lower_vars_to_ssa(ordered_shaders[i]);
|
||||||
nir_opt_dce(ordered_shaders[i]);
|
nir_opt_dce(ordered_shaders[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
|
if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
|
||||||
ac_lower_indirect_derefs(ordered_shaders[i - 1],
|
ac_nir_lower_indirect_derefs(ordered_shaders[i - 1],
|
||||||
pipeline->device->physical_device->rad_info.chip_class);
|
pipeline->device->physical_device->rad_info.chip_class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -644,7 +644,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
|
||||||
* bloat the instruction count of the loop and cause it to be
|
* bloat the instruction count of the loop and cause it to be
|
||||||
* considered too large for unrolling.
|
* considered too large for unrolling.
|
||||||
*/
|
*/
|
||||||
if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
|
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
|
||||||
!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
|
!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
|
||||||
nir->info.stage != MESA_SHADER_COMPUTE) {
|
nir->info.stage != MESA_SHADER_COMPUTE) {
|
||||||
/* Optimize the lowered code before the linking optimizations. */
|
/* Optimize the lowered code before the linking optimizations. */
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ac_nir_to_llvm.h"
|
#include "ac_nir_to_llvm.h"
|
||||||
|
#include "ac_nir.h"
|
||||||
#include "compiler/nir/nir.h"
|
#include "compiler/nir/nir.h"
|
||||||
#include "compiler/nir/nir_builder.h"
|
#include "compiler/nir/nir_builder.h"
|
||||||
#include "compiler/nir/nir_deref.h"
|
#include "compiler/nir/nir_deref.h"
|
||||||
|
@ -871,7 +872,7 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||||
NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
|
changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class);
|
||||||
if (changed)
|
if (changed)
|
||||||
si_nir_opts(sscreen, nir, false);
|
si_nir_opts(sscreen, nir, false);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue