From 1d8994a63b546a5b2dc4feb5bd98a84ee853d6af Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Wed, 27 Feb 2019 14:02:54 -0800 Subject: [PATCH] glsl: [u/i]mulExtended optimization for GLSL Optimize mulExtended to use 32x32->64 multiplication. Drivers which are not based on NIR, they can set the MUL64_TO_MUL_AND_MUL_HIGH lowering flag in order to have same old behavior. v2: Add missing condition check (Jason Ekstrand) Signed-off-by: Sagar Ghuge Suggested-by: Matt Turner Suggested-by: Jason Ekstrand Reviewed-by: Jason Ekstrand --- src/compiler/glsl/builtin_functions.cpp | 32 +++++++++- src/compiler/glsl/glsl_to_nir.cpp | 14 ++++- src/compiler/glsl/ir_optimization.h | 1 + src/compiler/glsl/ir_validate.cpp | 11 ++++ src/compiler/glsl/lower_instructions.cpp | 70 ++++++++++++++++++++++ src/mesa/program/ir_to_mesa.cpp | 1 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + 7 files changed, 126 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index aba1a14bd90..386cbc0ccd7 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -5866,14 +5866,42 @@ builtin_builder::_usubBorrow(const glsl_type *type) ir_function_signature * builtin_builder::_mulExtended(const glsl_type *type) { + const glsl_type *mul_type, *unpack_type; + ir_expression_operation unpack_op; + + if (type->base_type == GLSL_TYPE_INT) { + unpack_op = ir_unop_unpack_int_2x32; + mul_type = glsl_type::get_instance(GLSL_TYPE_INT64, type->vector_elements, 1); + unpack_type = glsl_type::ivec2_type; + } else { + unpack_op = ir_unop_unpack_uint_2x32; + mul_type = glsl_type::get_instance(GLSL_TYPE_UINT64, type->vector_elements, 1); + unpack_type = glsl_type::uvec2_type; + } + ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *msb = out_var(type, "msb"); ir_variable *lsb = out_var(type, "lsb"); MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31_or_integer_functions, 4, x, y, msb, lsb); - body.emit(assign(msb, imul_high(x, y))); - body.emit(assign(lsb, mul(x, y))); + ir_variable *unpack_val = body.make_temp(unpack_type, "_unpack_val"); + + ir_expression *mul_res = new(mem_ctx) ir_expression(ir_binop_mul, mul_type, + new(mem_ctx)ir_dereference_variable(x), + new(mem_ctx)ir_dereference_variable(y)); + + if (type->vector_elements == 1) { + body.emit(assign(unpack_val, expr(unpack_op, mul_res))); + body.emit(assign(msb, swizzle_y(unpack_val))); + body.emit(assign(lsb, swizzle_x(unpack_val))); + } else { + for (int i = 0; i < type->vector_elements; i++) { + body.emit(assign(unpack_val, expr(unpack_op, swizzle(mul_res, i, 1)))); + body.emit(assign(array_ref(msb, i), swizzle_y(unpack_val))); + body.emit(assign(array_ref(lsb, i), swizzle_x(unpack_val))); + } + } return sig; } diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 09a4f19f6f2..f7df91d887d 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1865,8 +1865,18 @@ nir_visitor::visit(ir_expression *ir) : nir_isub(&b, srcs[0], srcs[1]); break; case ir_binop_mul: - result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1]) - : nir_imul(&b, srcs[0], srcs[1]); + if (type_is_float(out_type)) + result = nir_fmul(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT64 && + (ir->operands[0]->type->base_type == GLSL_TYPE_INT || + ir->operands[1]->type->base_type == GLSL_TYPE_INT)) + result = nir_imul_2x32_64(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_UINT64 && + (ir->operands[0]->type->base_type == GLSL_TYPE_UINT || + ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) + result = nir_umul_2x32_64(&b, srcs[0], srcs[1]); + else + result = nir_imul(&b, srcs[0], srcs[1]); break; case ir_binop_div: if (type_is_float(out_type)) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index ef68b93c09e..e027654d3a0 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -57,6 +57,7 @@ struct gl_shader_program; #define DDIV_TO_MUL_RCP 0x100000 #define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) #define SQRT_TO_ABS_SQRT 0x200000 +#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000 /* Opertaions for lower_64bit_integer_instructions() */ #define MUL64 (1U << 0) diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index 819e8aa60dd..18d27cbf6b1 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -621,6 +621,17 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type); + if (ir->operation == ir_binop_mul && + (ir->type->base_type == GLSL_TYPE_UINT64 || + ir->type->base_type == GLSL_TYPE_INT64) && + (ir->operands[0]->type->base_type == GLSL_TYPE_INT || + ir->operands[1]->type->base_type == GLSL_TYPE_INT || + ir->operands[0]->type->base_type == GLSL_TYPE_UINT || + ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) { + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + } + if (ir->operands[0]->type->is_scalar()) assert(ir->operands[1]->type == ir->type); else if (ir->operands[1]->type->is_scalar()) diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 91f71b37619..8e0c8744048 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -169,6 +169,7 @@ private: void find_msb_to_float_cast(ir_expression *ir); void imul_high_to_mul(ir_expression *ir); void sqrt_to_abs_sqrt(ir_expression *ir); + void mul64_to_mul_and_mul_high(ir_expression *ir); ir_expression *_carry(operand a, operand b); }; @@ -1666,6 +1667,66 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir) +{ + /* Lower 32x32-> 64 to + * msb = imul_high(x_lo, y_lo) + * lsb = mul(x_lo, y_lo) + */ + const unsigned elements = ir->operands[0]->type->vector_elements; + + const ir_expression_operation operation = + ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32 + : ir_unop_pack_int_2x32; + + const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64 + ? glsl_type::uvec(elements) + : glsl_type::ivec(elements); + + const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64 + ? glsl_type::uvec2_type + : glsl_type::ivec2_type; + + ir_instruction &i = *base_ir; + + ir_variable *msb = + new(ir) ir_variable(var_type, "msb", ir_var_temporary); + ir_variable *lsb = + new(ir) ir_variable(var_type, "lsb", ir_var_temporary); + ir_variable *x = + new(ir) ir_variable(var_type, "x", ir_var_temporary); + ir_variable *y = + new(ir) ir_variable(var_type, "y", ir_var_temporary); + + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(y); + i.insert_before(assign(y, ir->operands[1])); + i.insert_before(msb); + i.insert_before(lsb); + + i.insert_before(assign(msb, imul_high(x, y))); + i.insert_before(assign(lsb, mul(x, y))); + + ir_rvalue *result[4] = {NULL}; + for (unsigned elem = 0; elem < elements; elem++) { + ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type, + swizzle(lsb, elem, 1), + swizzle(msb, elem, 1), NULL, NULL); + result[elem] = expr(operation, val); + } + + ir->operation = ir_quadop_vector; + ir->init_num_operands(); + ir->operands[0] = result[0]; + ir->operands[1] = result[1]; + ir->operands[2] = result[2]; + ir->operands[3] = result[3]; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1803,6 +1864,15 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) imul_high_to_mul(ir); break; + case ir_binop_mul: + if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) && + (ir->type->base_type == GLSL_TYPE_INT64 || + ir->type->base_type == GLSL_TYPE_UINT64) && + (ir->operands[0]->type->base_type == GLSL_TYPE_INT || + ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) + mul64_to_mul_and_mul_high(ir); + break; + case ir_unop_rsq: case ir_unop_sqrt: if (lowering(SQRT_TO_ABS_SQRT)) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index e65a6743353..ed194eb13a3 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -3053,6 +3053,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP + | MUL64_TO_MUL_AND_MUL_HIGH | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_common_optimization(ir, true, true, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 484a5329455..264557c9f58 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7379,6 +7379,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) FDIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | + MUL64_TO_MUL_AND_MUL_HIGH | (have_ldexp ? 0 : LDEXP_TO_ARITH) | (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | CARRY_TO_ARITH |