ir3: Make nir compiler options a part of ir3_compiler

This would allow for sub-gens to have different options. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
2021-11-30 18:06:53 +02:00 · 2021-11-30 18:06:53 +02:00 · e1f89a1da2
parent b8d486f298
commit e1f89a1da2
4 changed files with 133 additions and 125 deletions
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@ -69,6 +69,121 @@ ir3_compiler_destroy(struct ir3_compiler *compiler)
   ralloc_free(compiler);
 }

+static const nir_shader_compiler_options options = {
+   .lower_fpow = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_ldexp = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
+   .lower_mul_high = true,
+   .lower_mul_2x32_64 = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .vertex_id_zero_based = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_helper_invocation = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_pack_split = true,
+   .use_interpolated_input_intrinsics = true,
+   .lower_rotate = true,
+   .lower_to_scalar = true,
+   .has_imul24 = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .lower_wpos_pntc = true,
+   .lower_cs_local_index_from_id = true,
+
+   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
+    * but that should be harmless for GL since 64b is not
+    * supported there.
+    */
+   .lower_int64_options = (nir_lower_int64_options)~0,
+   .lower_uniforms_to_ubo = true,
+   .use_scoped_barrier = true,
+};
+
+/* we don't want to lower vertex_id to _zero_based on newer gpus: */
+static const nir_shader_compiler_options options_a6xx = {
+   .lower_fpow = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_ldexp = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
+   .lower_mul_high = true,
+   .lower_mul_2x32_64 = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .vertex_id_zero_based = false,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_helper_invocation = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_pack_split = true,
+   .use_interpolated_input_intrinsics = true,
+   .lower_rotate = true,
+   .vectorize_io = true,
+   .lower_to_scalar = true,
+   .has_imul24 = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .max_unroll_iterations = 32,
+   .force_indirect_unrolling = nir_var_all,
+   .lower_wpos_pntc = true,
+   .lower_cs_local_index_from_id = true,
+
+   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
+    * but that should be harmless for GL since 64b is not
+    * supported there.
+    */
+   .lower_int64_options = (nir_lower_int64_options)~0,
+   .lower_uniforms_to_ubo = true,
+   .lower_device_index_to_zero = true,
+   .use_scoped_barrier = true,
+};
+
 struct ir3_compiler *
 ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
                    bool robust_ubo_access)
@ -192,7 +307,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,

   compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32;

+   if (compiler->gen >= 6) {
+      compiler->nir_options = options_a6xx;
+   } else {
+      compiler->nir_options = options;
+   }
+
   ir3_disk_cache_init(compiler);

   return compiler;
 }
+
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler)
+{
+   return &compiler->nir_options;
+}
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@ -27,6 +27,7 @@
 #ifndef IR3_COMPILER_H_
 #define IR3_COMPILER_H_

+#include "compiler/nir/nir.h"
 #include "util/disk_cache.h"
 #include "util/log.h"

@ -45,6 +46,8 @@ struct ir3_compiler {

   struct disk_cache *disk_cache;

+   struct nir_shader_compiler_options nir_options;
+
   /* If true, UBO accesses are assumed to be bounds-checked as defined by
    * VK_EXT_robustness2 and optimizations may have to be more conservative.
    */
@ -186,6 +189,9 @@ bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
 void ir3_disk_cache_store(struct ir3_compiler *compiler,
                          struct ir3_shader_variant *v);

+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler);
+
 int ir3_compile_shader_nir(struct ir3_compiler *compiler,
                           struct ir3_shader_variant *so);

--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -31,129 +31,6 @@
 #include "ir3_nir.h"
 #include "ir3_shader.h"

-static const nir_shader_compiler_options options = {
-   .lower_fpow = true,
-   .lower_scmp = true,
-   .lower_flrp16 = true,
-   .lower_flrp32 = true,
-   .lower_flrp64 = true,
-   .lower_ffract = true,
-   .lower_fmod = true,
-   .lower_fdiv = true,
-   .lower_isign = true,
-   .lower_ldexp = true,
-   .lower_uadd_carry = true,
-   .lower_usub_borrow = true,
-   .lower_mul_high = true,
-   .lower_mul_2x32_64 = true,
-   .fuse_ffma16 = true,
-   .fuse_ffma32 = true,
-   .fuse_ffma64 = true,
-   .vertex_id_zero_based = true,
-   .lower_extract_byte = true,
-   .lower_extract_word = true,
-   .lower_insert_byte = true,
-   .lower_insert_word = true,
-   .lower_helper_invocation = true,
-   .lower_bitfield_insert_to_shifts = true,
-   .lower_bitfield_extract_to_shifts = true,
-   .lower_pack_half_2x16 = true,
-   .lower_pack_snorm_4x8 = true,
-   .lower_pack_snorm_2x16 = true,
-   .lower_pack_unorm_4x8 = true,
-   .lower_pack_unorm_2x16 = true,
-   .lower_unpack_half_2x16 = true,
-   .lower_unpack_snorm_4x8 = true,
-   .lower_unpack_snorm_2x16 = true,
-   .lower_unpack_unorm_4x8 = true,
-   .lower_unpack_unorm_2x16 = true,
-   .lower_pack_split = true,
-   .use_interpolated_input_intrinsics = true,
-   .lower_rotate = true,
-   .lower_to_scalar = true,
-   .has_imul24 = true,
-   .has_fsub = true,
-   .has_isub = true,
-   .lower_wpos_pntc = true,
-   .lower_cs_local_index_from_id = true,
-
-   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
-    * but that should be harmless for GL since 64b is not
-    * supported there.
-    */
-   .lower_int64_options = (nir_lower_int64_options)~0,
-   .lower_uniforms_to_ubo = true,
-   .use_scoped_barrier = true,
-};
-
-/* we don't want to lower vertex_id to _zero_based on newer gpus: */
-static const nir_shader_compiler_options options_a6xx = {
-   .lower_fpow = true,
-   .lower_scmp = true,
-   .lower_flrp16 = true,
-   .lower_flrp32 = true,
-   .lower_flrp64 = true,
-   .lower_ffract = true,
-   .lower_fmod = true,
-   .lower_fdiv = true,
-   .lower_isign = true,
-   .lower_ldexp = true,
-   .lower_uadd_carry = true,
-   .lower_usub_borrow = true,
-   .lower_mul_high = true,
-   .lower_mul_2x32_64 = true,
-   .fuse_ffma16 = true,
-   .fuse_ffma32 = true,
-   .fuse_ffma64 = true,
-   .vertex_id_zero_based = false,
-   .lower_extract_byte = true,
-   .lower_extract_word = true,
-   .lower_insert_byte = true,
-   .lower_insert_word = true,
-   .lower_helper_invocation = true,
-   .lower_bitfield_insert_to_shifts = true,
-   .lower_bitfield_extract_to_shifts = true,
-   .lower_pack_half_2x16 = true,
-   .lower_pack_snorm_4x8 = true,
-   .lower_pack_snorm_2x16 = true,
-   .lower_pack_unorm_4x8 = true,
-   .lower_pack_unorm_2x16 = true,
-   .lower_unpack_half_2x16 = true,
-   .lower_unpack_snorm_4x8 = true,
-   .lower_unpack_snorm_2x16 = true,
-   .lower_unpack_unorm_4x8 = true,
-   .lower_unpack_unorm_2x16 = true,
-   .lower_pack_split = true,
-   .use_interpolated_input_intrinsics = true,
-   .lower_rotate = true,
-   .vectorize_io = true,
-   .lower_to_scalar = true,
-   .has_imul24 = true,
-   .has_fsub = true,
-   .has_isub = true,
-   .max_unroll_iterations = 32,
-   .force_indirect_unrolling = nir_var_all,
-   .lower_wpos_pntc = true,
-   .lower_cs_local_index_from_id = true,
-
-   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
-    * but that should be harmless for GL since 64b is not
-    * supported there.
-    */
-   .lower_int64_options = (nir_lower_int64_options)~0,
-   .lower_uniforms_to_ubo = true,
-   .lower_device_index_to_zero = true,
-   .use_scoped_barrier = true,
-};
-
-const nir_shader_compiler_options *
-ir3_get_compiler_options(struct ir3_compiler *compiler)
-{
-   if (compiler->gen >= 6)
-      return &options_a6xx;
-   return &options;
-}
-
 static bool
 ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
                             unsigned bit_size, unsigned num_components,
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@ -62,8 +62,6 @@ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
 bool ir3_nir_lower_64b_undef(nir_shader *shader);
 bool ir3_nir_lower_64b_global(nir_shader *shader);

-const nir_shader_compiler_options *
-ir3_get_compiler_options(struct ir3_compiler *compiler);
 void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
 void ir3_nir_lower_io_to_temporaries(nir_shader *s);
 void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);