2015-03-31 22:03:39 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2015 Red Hat
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Rob Clark <robclark@freedesktop.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
#include "nir_builder.h"
|
|
|
|
|
2019-02-05 15:56:24 +00:00
|
|
|
/* Has two paths
|
|
|
|
* One (nir_lower_idiv_fast) lowers idiv/udiv/umod and is based on
|
|
|
|
* NV50LegalizeSSA::handleDIV()
|
2015-03-31 22:03:39 +01:00
|
|
|
*
|
2019-02-05 15:56:24 +00:00
|
|
|
* Note that this path probably does not have not enough precision for
|
|
|
|
* compute shaders. Perhaps we want a second higher precision (looping)
|
|
|
|
* version of this? Or perhaps we assume if you can do compute shaders you
|
|
|
|
* can also branch out to a pre-optimized shader library routine..
|
|
|
|
*
|
|
|
|
* The other path (nir_lower_idiv_precise) is based off of code used by LLVM's
|
|
|
|
* AMDGPU target. It should handle 32-bit idiv/irem/imod/udiv/umod exactly.
|
2015-03-31 22:03:39 +01:00
|
|
|
*/
|
|
|
|
|
2016-05-09 17:36:03 +01:00
|
|
|
static bool
|
2015-03-31 22:03:39 +01:00
|
|
|
convert_instr(nir_builder *bld, nir_alu_instr *alu)
|
|
|
|
{
|
2019-07-23 00:30:56 +01:00
|
|
|
nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r, *rt;
|
2015-03-31 22:03:39 +01:00
|
|
|
nir_op op = alu->op;
|
|
|
|
bool is_signed;
|
|
|
|
|
|
|
|
if ((op != nir_op_idiv) &&
|
|
|
|
(op != nir_op_udiv) &&
|
2019-04-16 07:06:23 +01:00
|
|
|
(op != nir_op_imod) &&
|
|
|
|
(op != nir_op_umod) &&
|
|
|
|
(op != nir_op_irem))
|
2016-05-09 17:36:03 +01:00
|
|
|
return false;
|
2015-03-31 22:03:39 +01:00
|
|
|
|
2019-04-16 07:06:23 +01:00
|
|
|
is_signed = (op == nir_op_idiv ||
|
|
|
|
op == nir_op_imod ||
|
|
|
|
op == nir_op_irem);
|
2015-03-31 22:03:39 +01:00
|
|
|
|
2015-08-06 15:16:07 +01:00
|
|
|
bld->cursor = nir_before_instr(&alu->instr);
|
2015-03-31 22:03:39 +01:00
|
|
|
|
nir: add nir_ssa_for_alu_src()
Using something like:
numer = nir_ssa_for_src(bld, alu->src[0].src,
nir_ssa_alu_instr_src_components(alu, 0));
for alu src's with swizzle, like:
vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
vec2 ssa_2 = udiv ssa_10.xx, ssa_11
ends up turning into something like:
vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
vec2 ssa_13 = imov ssa_10
...
because nir_ssa_for_src() ignore's the original nir_alu_src's swizzle.
Instead for alu instructions, nir_src_for_alu_src() should be used to
ensure the original alu src's swizzle doesn't get lost in translation:
vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
vec2 ssa_13 = imov ssa_10.xx
...
v2: check for abs/neg, and re-use existing nir_alu_src
Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-11-05 15:23:48 +00:00
|
|
|
numer = nir_ssa_for_alu_src(bld, alu, 0);
|
|
|
|
denom = nir_ssa_for_alu_src(bld, alu, 1);
|
2015-03-31 22:03:39 +01:00
|
|
|
|
|
|
|
if (is_signed) {
|
2017-03-08 03:54:37 +00:00
|
|
|
af = nir_i2f32(bld, numer);
|
|
|
|
bf = nir_i2f32(bld, denom);
|
2015-03-31 22:03:39 +01:00
|
|
|
af = nir_fabs(bld, af);
|
|
|
|
bf = nir_fabs(bld, bf);
|
|
|
|
a = nir_iabs(bld, numer);
|
|
|
|
b = nir_iabs(bld, denom);
|
|
|
|
} else {
|
2017-03-08 03:54:37 +00:00
|
|
|
af = nir_u2f32(bld, numer);
|
|
|
|
bf = nir_u2f32(bld, denom);
|
2015-03-31 22:03:39 +01:00
|
|
|
a = numer;
|
|
|
|
b = denom;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get first result: */
|
|
|
|
bf = nir_frcp(bld, bf);
|
|
|
|
bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
|
|
|
|
q = nir_fmul(bld, af, bf);
|
|
|
|
|
|
|
|
if (is_signed) {
|
2017-03-08 03:54:37 +00:00
|
|
|
q = nir_f2i32(bld, q);
|
2015-03-31 22:03:39 +01:00
|
|
|
} else {
|
2017-03-08 03:54:37 +00:00
|
|
|
q = nir_f2u32(bld, q);
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* get error of first result: */
|
|
|
|
r = nir_imul(bld, q, b);
|
|
|
|
r = nir_isub(bld, a, r);
|
2017-03-08 03:54:37 +00:00
|
|
|
r = nir_u2f32(bld, r);
|
2015-03-31 22:03:39 +01:00
|
|
|
r = nir_fmul(bld, r, bf);
|
2017-03-08 03:54:37 +00:00
|
|
|
r = nir_f2u32(bld, r);
|
2015-03-31 22:03:39 +01:00
|
|
|
|
|
|
|
/* add quotients: */
|
|
|
|
q = nir_iadd(bld, q, r);
|
|
|
|
|
|
|
|
/* correction: if modulus >= divisor, add 1 */
|
|
|
|
r = nir_imul(bld, q, b);
|
|
|
|
r = nir_isub(bld, a, r);
|
2019-07-23 00:30:56 +01:00
|
|
|
rt = nir_uge(bld, r, b);
|
2015-03-31 22:03:39 +01:00
|
|
|
|
2019-07-23 00:30:56 +01:00
|
|
|
if (op == nir_op_umod) {
|
|
|
|
q = nir_bcsel(bld, rt, nir_isub(bld, r, b), r);
|
|
|
|
} else {
|
|
|
|
r = nir_b2i32(bld, rt);
|
|
|
|
|
|
|
|
q = nir_iadd(bld, q, r);
|
|
|
|
if (is_signed) {
|
|
|
|
/* fix the sign: */
|
|
|
|
r = nir_ixor(bld, numer, denom);
|
|
|
|
r = nir_ilt(bld, r, nir_imm_int(bld, 0));
|
|
|
|
b = nir_ineg(bld, q);
|
|
|
|
q = nir_bcsel(bld, r, b, q);
|
|
|
|
|
|
|
|
if (op == nir_op_imod || op == nir_op_irem) {
|
|
|
|
q = nir_imul(bld, q, denom);
|
|
|
|
q = nir_isub(bld, numer, q);
|
|
|
|
if (op == nir_op_imod) {
|
2020-08-15 06:11:27 +01:00
|
|
|
q = nir_bcsel(bld, nir_ieq_imm(bld, q, 0),
|
2019-07-23 00:30:56 +01:00
|
|
|
nir_imm_int(bld, 0),
|
|
|
|
nir_bcsel(bld, r, nir_iadd(bld, q, denom), q));
|
|
|
|
}
|
2019-04-16 07:06:23 +01:00
|
|
|
}
|
|
|
|
}
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(alu->dest.dest.is_ssa);
|
2021-03-03 06:13:38 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, q);
|
2016-05-09 17:36:03 +01:00
|
|
|
|
|
|
|
return true;
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|
|
|
|
|
2019-02-05 15:56:24 +00:00
|
|
|
/* ported from LLVM's AMDGPUTargetLowering::LowerUDIVREM */
|
|
|
|
static nir_ssa_def *
|
|
|
|
emit_udiv(nir_builder *bld, nir_ssa_def *numer, nir_ssa_def *denom, bool modulo)
|
|
|
|
{
|
|
|
|
nir_ssa_def *rcp = nir_frcp(bld, nir_u2f32(bld, denom));
|
2020-08-11 15:25:37 +01:00
|
|
|
rcp = nir_f2u32(bld, nir_fmul_imm(bld, rcp, 4294966784.0));
|
|
|
|
|
|
|
|
nir_ssa_def *neg_rcp_times_denom =
|
|
|
|
nir_imul(bld, rcp, nir_ineg(bld, denom));
|
|
|
|
rcp = nir_iadd(bld, rcp, nir_umul_high(bld, rcp, neg_rcp_times_denom));
|
|
|
|
|
|
|
|
/* Get initial estimate for quotient/remainder, then refine the estimate
|
|
|
|
* in two iterations after */
|
|
|
|
nir_ssa_def *quotient = nir_umul_high(bld, numer, rcp);
|
2019-02-05 15:56:24 +00:00
|
|
|
nir_ssa_def *num_s_remainder = nir_imul(bld, quotient, denom);
|
|
|
|
nir_ssa_def *remainder = nir_isub(bld, numer, num_s_remainder);
|
2020-08-11 15:25:37 +01:00
|
|
|
|
|
|
|
/* First refinement step */
|
2019-02-05 15:56:24 +00:00
|
|
|
nir_ssa_def *remainder_ge_den = nir_uge(bld, remainder, denom);
|
2020-08-11 15:25:37 +01:00
|
|
|
if (!modulo) {
|
|
|
|
quotient = nir_bcsel(bld, remainder_ge_den,
|
|
|
|
nir_iadd_imm(bld, quotient, 1), quotient);
|
|
|
|
}
|
|
|
|
remainder = nir_bcsel(bld, remainder_ge_den,
|
|
|
|
nir_isub(bld, remainder, denom), remainder);
|
2019-02-05 15:56:24 +00:00
|
|
|
|
2020-08-11 15:25:37 +01:00
|
|
|
/* Second refinement step */
|
|
|
|
remainder_ge_den = nir_uge(bld, remainder, denom);
|
2019-02-05 15:56:24 +00:00
|
|
|
if (modulo) {
|
2020-08-11 15:25:37 +01:00
|
|
|
return nir_bcsel(bld, remainder_ge_den, nir_isub(bld, remainder, denom),
|
|
|
|
remainder);
|
2019-02-05 15:56:24 +00:00
|
|
|
} else {
|
2020-08-11 15:25:37 +01:00
|
|
|
return nir_bcsel(bld, remainder_ge_den, nir_iadd_imm(bld, quotient, 1),
|
|
|
|
quotient);
|
2019-02-05 15:56:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ported from LLVM's AMDGPUTargetLowering::LowerSDIVREM */
|
|
|
|
static nir_ssa_def *
|
|
|
|
emit_idiv(nir_builder *bld, nir_ssa_def *numer, nir_ssa_def *denom, nir_op op)
|
|
|
|
{
|
|
|
|
nir_ssa_def *lh_sign = nir_ilt(bld, numer, nir_imm_int(bld, 0));
|
|
|
|
nir_ssa_def *rh_sign = nir_ilt(bld, denom, nir_imm_int(bld, 0));
|
|
|
|
lh_sign = nir_bcsel(bld, lh_sign, nir_imm_int(bld, -1), nir_imm_int(bld, 0));
|
|
|
|
rh_sign = nir_bcsel(bld, rh_sign, nir_imm_int(bld, -1), nir_imm_int(bld, 0));
|
|
|
|
|
|
|
|
nir_ssa_def *lhs = nir_iadd(bld, numer, lh_sign);
|
|
|
|
nir_ssa_def *rhs = nir_iadd(bld, denom, rh_sign);
|
|
|
|
lhs = nir_ixor(bld, lhs, lh_sign);
|
|
|
|
rhs = nir_ixor(bld, rhs, rh_sign);
|
|
|
|
|
|
|
|
if (op == nir_op_idiv) {
|
|
|
|
nir_ssa_def *d_sign = nir_ixor(bld, lh_sign, rh_sign);
|
|
|
|
nir_ssa_def *res = emit_udiv(bld, lhs, rhs, false);
|
|
|
|
res = nir_ixor(bld, res, d_sign);
|
|
|
|
return nir_isub(bld, res, d_sign);
|
|
|
|
} else {
|
|
|
|
nir_ssa_def *res = emit_udiv(bld, lhs, rhs, true);
|
|
|
|
res = nir_ixor(bld, res, lh_sign);
|
|
|
|
res = nir_isub(bld, res, lh_sign);
|
|
|
|
if (op == nir_op_imod) {
|
2020-08-15 06:11:27 +01:00
|
|
|
nir_ssa_def *cond = nir_ieq_imm(bld, res, 0);
|
2019-02-05 15:56:24 +00:00
|
|
|
cond = nir_ior(bld, nir_ieq(bld, lh_sign, rh_sign), cond);
|
|
|
|
res = nir_bcsel(bld, cond, res, nir_iadd(bld, res, denom));
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
convert_instr_precise(nir_builder *bld, nir_alu_instr *alu)
|
|
|
|
{
|
|
|
|
nir_op op = alu->op;
|
|
|
|
|
|
|
|
if ((op != nir_op_idiv) &&
|
|
|
|
(op != nir_op_imod) &&
|
|
|
|
(op != nir_op_irem) &&
|
|
|
|
(op != nir_op_udiv) &&
|
|
|
|
(op != nir_op_umod))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (alu->dest.dest.ssa.bit_size != 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bld->cursor = nir_before_instr(&alu->instr);
|
|
|
|
|
|
|
|
nir_ssa_def *numer = nir_ssa_for_alu_src(bld, alu, 0);
|
|
|
|
nir_ssa_def *denom = nir_ssa_for_alu_src(bld, alu, 1);
|
|
|
|
|
|
|
|
nir_ssa_def *res = NULL;
|
|
|
|
|
|
|
|
if (op == nir_op_udiv || op == nir_op_umod)
|
|
|
|
res = emit_udiv(bld, numer, denom, op == nir_op_umod);
|
|
|
|
else
|
|
|
|
res = emit_idiv(bld, numer, denom, op);
|
|
|
|
|
|
|
|
assert(alu->dest.dest.is_ssa);
|
2021-03-03 06:13:38 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, res);
|
2019-02-05 15:56:24 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-05-09 17:36:03 +01:00
|
|
|
static bool
|
2019-02-05 15:56:24 +00:00
|
|
|
convert_impl(nir_function_impl *impl, enum nir_lower_idiv_path path)
|
2015-03-31 22:03:39 +01:00
|
|
|
{
|
|
|
|
nir_builder b;
|
|
|
|
nir_builder_init(&b, impl);
|
2016-05-09 17:36:03 +01:00
|
|
|
bool progress = false;
|
2015-03-31 22:03:39 +01:00
|
|
|
|
2016-04-08 21:32:58 +01:00
|
|
|
nir_foreach_block(block, impl) {
|
2016-04-27 02:34:19 +01:00
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2019-02-05 15:56:24 +00:00
|
|
|
if (instr->type == nir_instr_type_alu && path == nir_lower_idiv_precise)
|
|
|
|
progress |= convert_instr_precise(&b, nir_instr_as_alu(instr));
|
|
|
|
else if (instr->type == nir_instr_type_alu)
|
2016-05-09 17:36:03 +01:00
|
|
|
progress |= convert_instr(&b, nir_instr_as_alu(instr));
|
2016-04-08 21:32:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-31 22:03:39 +01:00
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
nir_metadata_dominance);
|
2016-05-09 17:36:03 +01:00
|
|
|
|
|
|
|
return progress;
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|
|
|
|
|
2016-05-09 17:36:03 +01:00
|
|
|
bool
|
2019-02-05 15:56:24 +00:00
|
|
|
nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path)
|
2015-03-31 22:03:39 +01:00
|
|
|
{
|
2016-05-09 17:36:03 +01:00
|
|
|
bool progress = false;
|
|
|
|
|
2016-04-27 04:26:42 +01:00
|
|
|
nir_foreach_function(function, shader) {
|
2015-12-26 18:00:47 +00:00
|
|
|
if (function->impl)
|
2019-02-05 15:56:24 +00:00
|
|
|
progress |= convert_impl(function->impl, path);
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|
2016-05-09 17:36:03 +01:00
|
|
|
|
|
|
|
return progress;
|
2015-03-31 22:03:39 +01:00
|
|
|
}
|