glsl,nir: Move i/umulExtended lowering to NIR.

NIR already has the necessary lowering, and the GLSL lowering violates
GLSL IR validation rules.  Once quadop lowering was turned off, the IR
validation at the end of the compile path on DEBUG builds caught the
problem.

In order to move the lowering to NIR, though, we need to make sure that
drivers supporting these functions actually have the lowering flag set.

xfails added for t860, where apparently this tickles a variety of existing
64-bit bugs in the backend.

Fixes: #6461
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Mykhailo Skorokhodov <mykhailo.skorokhodov@globallogic.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16437>
This commit is contained in:
Emma Anholt 2022-05-10 10:31:07 -07:00 committed by Marge Bot
parent 6f0db3778f
commit 7472bb4bad
12 changed files with 15 additions and 73 deletions

View File

@ -55,7 +55,6 @@ struct gl_shader_program;
#define DDIV_TO_MUL_RCP 0x100000
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
#define SQRT_TO_ABS_SQRT 0x200000
#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000
/* Operations for lower_64bit_integer_instructions() */
#define DIV64 (1U << 0)

View File

@ -157,7 +157,6 @@ private:
void find_msb_to_float_cast(ir_expression *ir);
void imul_high_to_mul(ir_expression *ir);
void sqrt_to_abs_sqrt(ir_expression *ir);
void mul64_to_mul_and_mul_high(ir_expression *ir);
ir_expression *_carry(operand a, operand b);
@ -1615,66 +1614,6 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir)
this->progress = true;
}
void
lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir)
{
/* Lower 32x32-> 64 to
* msb = imul_high(x_lo, y_lo)
* lsb = mul(x_lo, y_lo)
*/
const unsigned elements = ir->operands[0]->type->vector_elements;
const ir_expression_operation operation =
ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32
: ir_unop_pack_int_2x32;
const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec(elements)
: glsl_type::ivec(elements);
const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec2_type
: glsl_type::ivec2_type;
ir_instruction &i = *base_ir;
ir_variable *msb =
new(ir) ir_variable(var_type, "msb", ir_var_temporary);
ir_variable *lsb =
new(ir) ir_variable(var_type, "lsb", ir_var_temporary);
ir_variable *x =
new(ir) ir_variable(var_type, "x", ir_var_temporary);
ir_variable *y =
new(ir) ir_variable(var_type, "y", ir_var_temporary);
i.insert_before(x);
i.insert_before(assign(x, ir->operands[0]));
i.insert_before(y);
i.insert_before(assign(y, ir->operands[1]));
i.insert_before(msb);
i.insert_before(lsb);
i.insert_before(assign(msb, imul_high(x, y)));
i.insert_before(assign(lsb, mul(x, y)));
ir_rvalue *result[4] = {NULL};
for (unsigned elem = 0; elem < elements; elem++) {
ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type,
swizzle(lsb, elem, 1),
swizzle(msb, elem, 1), NULL, NULL);
result[elem] = expr(operation, val);
}
ir->operation = ir_quadop_vector;
ir->init_num_operands();
ir->operands[0] = result[0];
ir->operands[1] = result[1];
ir->operands[2] = result[2];
ir->operands[3] = result[3];
this->progress = true;
}
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@ -1802,15 +1741,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
imul_high_to_mul(ir);
break;
case ir_binop_mul:
if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) &&
(ir->type->base_type == GLSL_TYPE_INT64 ||
ir->type->base_type == GLSL_TYPE_UINT64) &&
(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
mul64_to_mul_and_mul_high(ir);
break;
case ir_unop_rsq:
case ir_unop_sqrt:
if (lowering(SQRT_TO_ABS_SQRT))

View File

@ -3906,6 +3906,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.use_interpolated_input_intrinsics = true,
};

View File

@ -619,6 +619,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_usub_borrow = true,
.lower_mul_2x32_64 = true,
.lower_ifind_msb = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
.lower_to_scalar = true,

View File

@ -3393,7 +3393,7 @@ nvir_nir_shader_compiler_options(int chipset)
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
nir_lower_imul_2x32_64 |
((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
nir_lower_ufind_msb64
);

View File

@ -1081,6 +1081,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_insert_word = true,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product,

View File

@ -88,6 +88,7 @@ static const nir_shader_compiler_options sp_compiler_options = {
.lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
};

View File

@ -762,6 +762,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
.lower_extract_word = true, \
.lower_insert_byte = true, \
.lower_insert_word = true, \
.lower_int64_options = nir_lower_imul_2x32_64, \
.lower_fdph = true, \
.lower_flrp64 = true, \
.lower_rotate = true, \

View File

@ -729,6 +729,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
.lower_wpos_pntc = true,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.has_fsub = true,
.has_isub = true,
.divergence_analysis_options =

View File

@ -106,7 +106,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
FDIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
MUL64_TO_MUL_AND_MUL_HIGH |
(have_ldexp ? 0 : LDEXP_TO_ARITH) |
(have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
CARRY_TO_ARITH |

View File

@ -87,3 +87,8 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d,Fail
dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d_array,Fail
dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d,Fail
dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec4_highp_vertex,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec4_highp_vertex,Fail

View File

@ -83,6 +83,9 @@ static const nir_shader_compiler_options midgard_nir_options = {
.lower_unpack_unorm_4x8 = true,
.lower_unpack_snorm_4x8 = true,
.lower_pack_split = true,
.lower_pack_64_2x32_split = true,
.lower_unpack_64_2x32_split = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.lower_doubles_options = nir_lower_dmod,