glsl,nir: Move i/umulExtended lowering to NIR.

NIR already has the necessary lowering, and the GLSL lowering violates GLSL IR validation rules. Once quadop lowering was turned off, the IR validation at the end of the compile path on DEBUG builds caught the problem. In order to move the lowering to NIR, though, we need to make sure that drivers supporting these functions actually have the lowering flag set. xfails added for t860, where apparently this tickles a variety of existing 64-bit bugs in the backend. Fixes: #6461 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Mykhailo Skorokhodov <mykhailo.skorokhodov@globallogic.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16437>
2022-05-10 10:31:07 -07:00 · 2022-05-10 10:31:07 -07:00 · 7472bb4bad
parent 6f0db3778f
commit 7472bb4bad
12 changed files with 15 additions and 73 deletions
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@ -55,7 +55,6 @@ struct gl_shader_program;
 #define DDIV_TO_MUL_RCP           0x100000
 #define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT          0x200000
-#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000

 /* Operations for lower_64bit_integer_instructions() */
 #define DIV64                     (1U << 0)
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@ -157,7 +157,6 @@ private:
   void find_msb_to_float_cast(ir_expression *ir);
   void imul_high_to_mul(ir_expression *ir);
   void sqrt_to_abs_sqrt(ir_expression *ir);
-   void mul64_to_mul_and_mul_high(ir_expression *ir);

   ir_expression *_carry(operand a, operand b);

@ -1615,66 +1614,6 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir)
   this->progress = true;
 }

-void
-lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir)
-{
-   /* Lower 32x32-> 64 to
-    *    msb = imul_high(x_lo, y_lo)
-    *    lsb = mul(x_lo, y_lo)
-    */
-   const unsigned elements = ir->operands[0]->type->vector_elements;
-
-   const ir_expression_operation operation =
-      ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32
-                                              : ir_unop_pack_int_2x32;
-
-   const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec(elements)
-                               : glsl_type::ivec(elements);
-
-   const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec2_type
-                               : glsl_type::ivec2_type;
-
-   ir_instruction &i = *base_ir;
-
-   ir_variable *msb =
-      new(ir) ir_variable(var_type, "msb", ir_var_temporary);
-   ir_variable *lsb =
-      new(ir) ir_variable(var_type, "lsb", ir_var_temporary);
-   ir_variable *x =
-      new(ir) ir_variable(var_type, "x", ir_var_temporary);
-   ir_variable *y =
-      new(ir) ir_variable(var_type, "y", ir_var_temporary);
-
-   i.insert_before(x);
-   i.insert_before(assign(x, ir->operands[0]));
-   i.insert_before(y);
-   i.insert_before(assign(y, ir->operands[1]));
-   i.insert_before(msb);
-   i.insert_before(lsb);
-
-   i.insert_before(assign(msb, imul_high(x, y)));
-   i.insert_before(assign(lsb, mul(x, y)));
-
-   ir_rvalue *result[4] = {NULL};
-   for (unsigned elem = 0; elem < elements; elem++) {
-      ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type,
-                                             swizzle(lsb, elem, 1),
-                                             swizzle(msb, elem, 1), NULL, NULL);
-      result[elem] = expr(operation, val);
-   }
-
-   ir->operation = ir_quadop_vector;
-   ir->init_num_operands();
-   ir->operands[0] = result[0];
-   ir->operands[1] = result[1];
-   ir->operands[2] = result[2];
-   ir->operands[3] = result[3];
-
-   this->progress = true;
-}
-
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@ -1802,15 +1741,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
         imul_high_to_mul(ir);
      break;

-   case ir_binop_mul:
-      if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) &&
-          (ir->type->base_type == GLSL_TYPE_INT64 ||
-           ir->type->base_type == GLSL_TYPE_UINT64) &&
-          (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
-           ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
-         mul64_to_mul_and_mul_high(ir);
-      break;
-
   case ir_unop_rsq:
   case ir_unop_sqrt:
      if (lowering(SQRT_TO_ABS_SQRT))
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@ -3906,6 +3906,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
   .lower_rotate = true,
   .lower_uniforms_to_ubo = true,
   .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
   .use_interpolated_input_intrinsics = true,
 };

--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@ -619,6 +619,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
   .lower_usub_borrow = true,
   .lower_mul_2x32_64 = true,
   .lower_ifind_msb = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
   .max_unroll_iterations = 32,
   .use_interpolated_input_intrinsics = true,
   .lower_to_scalar = true,
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@ -3393,7 +3393,7 @@ nvir_nir_shader_compiler_options(int chipset)
      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
-      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
+      nir_lower_imul_2x32_64 |
      ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
      nir_lower_ufind_msb64
   );
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@ -1081,6 +1081,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
      .lower_insert_word = true,
      .lower_rotate = true,
      .lower_to_scalar = true,
+      .lower_int64_options = nir_lower_imul_2x32_64,
      .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
      .has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
      .has_dot_2x16 = sscreen->info.has_accelerated_dot_product,
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@ -88,6 +88,7 @@ static const nir_shader_compiler_options sp_compiler_options = {
   .lower_rotate = true,
   .lower_uniforms_to_ubo = true,
   .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
   .max_unroll_iterations = 32,
   .use_interpolated_input_intrinsics = true,
 };
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@ -762,6 +762,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
   .lower_extract_word = true,                                                \
   .lower_insert_byte = true,                                                 \
   .lower_insert_word = true,                                                 \
+   .lower_int64_options = nir_lower_imul_2x32_64,                             \
   .lower_fdph = true,                                                        \
   .lower_flrp64 = true,                                                      \
   .lower_rotate = true,                                                      \
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@ -729,6 +729,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
        .lower_wpos_pntc = true,
        .lower_rotate = true,
        .lower_to_scalar = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,
        .has_fsub = true,
        .has_isub = true,
        .divergence_analysis_options =
--- a/src/mesa/state_tracker/st_glsl_to_ir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_ir.cpp
@ -106,7 +106,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                         FDIV_TO_MUL_RCP |
                         EXP_TO_EXP2 |
                         LOG_TO_LOG2 |
-                         MUL64_TO_MUL_AND_MUL_HIGH |
                         (have_ldexp ? 0 : LDEXP_TO_ARITH) |
                         (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
                         CARRY_TO_ARITH |
--- a/src/panfrost/ci/panfrost-t860-fails.txt
+++ b/src/panfrost/ci/panfrost-t860-fails.txt
@ -87,3 +87,8 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d_array,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array,Fail
+
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec4_highp_vertex,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec4_highp_vertex,Fail
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@ -83,6 +83,9 @@ static const nir_shader_compiler_options midgard_nir_options = {
        .lower_unpack_unorm_4x8 = true,
        .lower_unpack_snorm_4x8 = true,
        .lower_pack_split = true,
+        .lower_pack_64_2x32_split = true,
+        .lower_unpack_64_2x32_split = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,

        .lower_doubles_options = nir_lower_dmod,