ir3: Make nir compiler options a part of ir3_compiler
This would allow for sub-gens to have different options. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
This commit is contained in:
parent
b8d486f298
commit
e1f89a1da2
|
@ -69,6 +69,121 @@ ir3_compiler_destroy(struct ir3_compiler *compiler)
|
|||
ralloc_free(compiler);
|
||||
}
|
||||
|
||||
static const nir_shader_compiler_options options = {
|
||||
.lower_fpow = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_pack_split = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_rotate = true,
|
||||
.lower_to_scalar = true,
|
||||
.has_imul24 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
|
||||
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
||||
* but that should be harmless for GL since 64b is not
|
||||
* supported there.
|
||||
*/
|
||||
.lower_int64_options = (nir_lower_int64_options)~0,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.use_scoped_barrier = true,
|
||||
};
|
||||
|
||||
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
|
||||
static const nir_shader_compiler_options options_a6xx = {
|
||||
.lower_fpow = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.vertex_id_zero_based = false,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_pack_split = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_rotate = true,
|
||||
.vectorize_io = true,
|
||||
.lower_to_scalar = true,
|
||||
.has_imul24 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.force_indirect_unrolling = nir_var_all,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
|
||||
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
||||
* but that should be harmless for GL since 64b is not
|
||||
* supported there.
|
||||
*/
|
||||
.lower_int64_options = (nir_lower_int64_options)~0,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.lower_device_index_to_zero = true,
|
||||
.use_scoped_barrier = true,
|
||||
};
|
||||
|
||||
struct ir3_compiler *
|
||||
ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
bool robust_ubo_access)
|
||||
|
@ -192,7 +307,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
|
||||
compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32;
|
||||
|
||||
if (compiler->gen >= 6) {
|
||||
compiler->nir_options = options_a6xx;
|
||||
} else {
|
||||
compiler->nir_options = options;
|
||||
}
|
||||
|
||||
ir3_disk_cache_init(compiler);
|
||||
|
||||
return compiler;
|
||||
}
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
ir3_get_compiler_options(struct ir3_compiler *compiler)
|
||||
{
|
||||
return &compiler->nir_options;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#ifndef IR3_COMPILER_H_
|
||||
#define IR3_COMPILER_H_
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/log.h"
|
||||
|
||||
|
@ -45,6 +46,8 @@ struct ir3_compiler {
|
|||
|
||||
struct disk_cache *disk_cache;
|
||||
|
||||
struct nir_shader_compiler_options nir_options;
|
||||
|
||||
/* If true, UBO accesses are assumed to be bounds-checked as defined by
|
||||
* VK_EXT_robustness2 and optimizations may have to be more conservative.
|
||||
*/
|
||||
|
@ -186,6 +189,9 @@ bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
|
|||
void ir3_disk_cache_store(struct ir3_compiler *compiler,
|
||||
struct ir3_shader_variant *v);
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
ir3_get_compiler_options(struct ir3_compiler *compiler);
|
||||
|
||||
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||
struct ir3_shader_variant *so);
|
||||
|
||||
|
|
|
@ -31,129 +31,6 @@
|
|||
#include "ir3_nir.h"
|
||||
#include "ir3_shader.h"
|
||||
|
||||
static const nir_shader_compiler_options options = {
|
||||
.lower_fpow = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_pack_split = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_rotate = true,
|
||||
.lower_to_scalar = true,
|
||||
.has_imul24 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
|
||||
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
||||
* but that should be harmless for GL since 64b is not
|
||||
* supported there.
|
||||
*/
|
||||
.lower_int64_options = (nir_lower_int64_options)~0,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.use_scoped_barrier = true,
|
||||
};
|
||||
|
||||
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
|
||||
static const nir_shader_compiler_options options_a6xx = {
|
||||
.lower_fpow = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.vertex_id_zero_based = false,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_pack_split = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_rotate = true,
|
||||
.vectorize_io = true,
|
||||
.lower_to_scalar = true,
|
||||
.has_imul24 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.force_indirect_unrolling = nir_var_all,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
|
||||
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
||||
* but that should be harmless for GL since 64b is not
|
||||
* supported there.
|
||||
*/
|
||||
.lower_int64_options = (nir_lower_int64_options)~0,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.lower_device_index_to_zero = true,
|
||||
.use_scoped_barrier = true,
|
||||
};
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
ir3_get_compiler_options(struct ir3_compiler *compiler)
|
||||
{
|
||||
if (compiler->gen >= 6)
|
||||
return &options_a6xx;
|
||||
return &options;
|
||||
}
|
||||
|
||||
static bool
|
||||
ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size, unsigned num_components,
|
||||
|
|
|
@ -62,8 +62,6 @@ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
|
|||
bool ir3_nir_lower_64b_undef(nir_shader *shader);
|
||||
bool ir3_nir_lower_64b_global(nir_shader *shader);
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
ir3_get_compiler_options(struct ir3_compiler *compiler);
|
||||
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
|
||||
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
|
||||
|
|
Loading…
Reference in New Issue