nir: Make boolean conversions sized just like the others
Instead of a single i2b and b2i, we now have i2b32 and b2iN where N is one if 8, 16, 32, or 64. This leads to having a few more opcodes but now everything is consistent and booleans aren't a weird special case anymore. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
parent
be98b1db38
commit
dca6cd9ce6
|
@ -941,16 +941,20 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||||
src[1] = ac_to_integer(&ctx->ac, src[1]);
|
src[1] = ac_to_integer(&ctx->ac, src[1]);
|
||||||
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
|
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2f:
|
case nir_op_b2f16:
|
||||||
|
case nir_op_b2f32:
|
||||||
|
case nir_op_b2f64:
|
||||||
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
result = emit_f2b(&ctx->ac, src[0]);
|
result = emit_f2b(&ctx->ac, src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2i:
|
case nir_op_b2i16:
|
||||||
|
case nir_op_b2i32:
|
||||||
|
case nir_op_b2i64:
|
||||||
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
||||||
break;
|
break;
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
src[0] = ac_to_integer(&ctx->ac, src[0]);
|
src[0] = ac_to_integer(&ctx->ac, src[0]);
|
||||||
result = emit_i2b(&ctx->ac, src[0]);
|
result = emit_i2b(&ctx->ac, src[0]);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -682,14 +682,14 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||||
case nir_op_u2f32:
|
case nir_op_u2f32:
|
||||||
result = vir_UTOF(c, src[0]);
|
result = vir_UTOF(c, src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2f:
|
case nir_op_b2f32:
|
||||||
result = vir_AND(c, src[0], vir_uniform_f(c, 1.0));
|
result = vir_AND(c, src[0], vir_uniform_f(c, 1.0));
|
||||||
break;
|
break;
|
||||||
case nir_op_b2i:
|
case nir_op_b2i32:
|
||||||
result = vir_AND(c, src[0], vir_uniform_ui(c, 1));
|
result = vir_AND(c, src[0], vir_uniform_ui(c, 1));
|
||||||
break;
|
break;
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
|
vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
|
||||||
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
|
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
|
||||||
vir_uniform_ui(c, ~0),
|
vir_uniform_ui(c, ~0),
|
||||||
|
|
|
@ -1527,7 +1527,7 @@ nir_visitor::visit(ir_expression *ir)
|
||||||
result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
|
result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
|
||||||
break;
|
break;
|
||||||
case ir_unop_b2f:
|
case ir_unop_b2f:
|
||||||
result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
|
result = supports_ints ? nir_b2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
|
||||||
break;
|
break;
|
||||||
case ir_unop_f2i:
|
case ir_unop_f2i:
|
||||||
case ir_unop_f2u:
|
case ir_unop_f2u:
|
||||||
|
|
|
@ -1568,8 +1568,8 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
|
||||||
case nir_op_uge:
|
case nir_op_uge:
|
||||||
case nir_op_ieq:
|
case nir_op_ieq:
|
||||||
case nir_op_ine:
|
case nir_op_ine:
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
case nir_op_inot:
|
case nir_op_inot:
|
||||||
case nir_op_fnot:
|
case nir_op_fnot:
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -45,6 +45,10 @@ conv_opcode_types = {
|
||||||
'f2i' : 'int',
|
'f2i' : 'int',
|
||||||
'u2u' : 'uint',
|
'u2u' : 'uint',
|
||||||
'i2i' : 'int',
|
'i2i' : 'int',
|
||||||
|
'b2f' : 'float',
|
||||||
|
'b2i' : 'int',
|
||||||
|
'i2b' : 'bool',
|
||||||
|
'f2b' : 'bool',
|
||||||
}
|
}
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
|
|
|
@ -963,6 +963,18 @@ nir_load_param(nir_builder *build, uint32_t param_idx)
|
||||||
|
|
||||||
#include "nir_builder_opcodes.h"
|
#include "nir_builder_opcodes.h"
|
||||||
|
|
||||||
|
static inline nir_ssa_def *
|
||||||
|
nir_f2b(nir_builder *build, nir_ssa_def *f)
|
||||||
|
{
|
||||||
|
return nir_f2b32(build, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline nir_ssa_def *
|
||||||
|
nir_i2b(nir_builder *build, nir_ssa_def *i)
|
||||||
|
{
|
||||||
|
return nir_i2b32(build, i);
|
||||||
|
}
|
||||||
|
|
||||||
static inline nir_ssa_def *
|
static inline nir_ssa_def *
|
||||||
nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
|
nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
|
||||||
unsigned interp_mode)
|
unsigned interp_mode)
|
||||||
|
|
|
@ -95,7 +95,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
|
||||||
r = nir_isub(bld, a, r);
|
r = nir_isub(bld, a, r);
|
||||||
|
|
||||||
r = nir_uge(bld, r, b);
|
r = nir_uge(bld, r, b);
|
||||||
r = nir_b2i(bld, r);
|
r = nir_b2i32(bld, r);
|
||||||
|
|
||||||
q = nir_iadd(bld, q, r);
|
q = nir_iadd(bld, q, r);
|
||||||
if (is_signed) {
|
if (is_signed) {
|
||||||
|
|
|
@ -48,7 +48,7 @@ lower_isign64(nir_builder *b, nir_ssa_def *x)
|
||||||
|
|
||||||
nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
|
nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
|
||||||
nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
|
nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
|
||||||
nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero));
|
nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero));
|
||||||
|
|
||||||
return nir_pack_64_2x32_split(b, res_lo, res_hi);
|
return nir_pack_64_2x32_split(b, res_lo, res_hi);
|
||||||
}
|
}
|
||||||
|
|
|
@ -90,6 +90,7 @@ class Opcode(object):
|
||||||
# helper variables for strings
|
# helper variables for strings
|
||||||
tfloat = "float"
|
tfloat = "float"
|
||||||
tint = "int"
|
tint = "int"
|
||||||
|
tbool = "bool"
|
||||||
tbool32 = "bool32"
|
tbool32 = "bool32"
|
||||||
tuint = "uint"
|
tuint = "uint"
|
||||||
tuint16 = "uint16"
|
tuint16 = "uint16"
|
||||||
|
@ -117,6 +118,8 @@ def type_size(type_):
|
||||||
def type_sizes(type_):
|
def type_sizes(type_):
|
||||||
if type_has_size(type_):
|
if type_has_size(type_):
|
||||||
return [type_size(type_)]
|
return [type_size(type_)]
|
||||||
|
elif type_ == 'bool':
|
||||||
|
return [32]
|
||||||
elif type_ == 'float':
|
elif type_ == 'float':
|
||||||
return [16, 32, 64]
|
return [16, 32, 64]
|
||||||
else:
|
else:
|
||||||
|
@ -196,11 +199,15 @@ unop("fexp2", tfloat, "exp2f(src0)")
|
||||||
unop("flog2", tfloat, "log2f(src0)")
|
unop("flog2", tfloat, "log2f(src0)")
|
||||||
|
|
||||||
# Generate all of the numeric conversion opcodes
|
# Generate all of the numeric conversion opcodes
|
||||||
for src_t in [tint, tuint, tfloat]:
|
for src_t in [tint, tuint, tfloat, tbool]:
|
||||||
if src_t in (tint, tuint):
|
if src_t == tbool:
|
||||||
dst_types = [tfloat, src_t]
|
dst_types = [tfloat, tint]
|
||||||
|
elif src_t == tint:
|
||||||
|
dst_types = [tfloat, tint, tbool]
|
||||||
|
elif src_t == tuint:
|
||||||
|
dst_types = [tfloat, tuint]
|
||||||
elif src_t == tfloat:
|
elif src_t == tfloat:
|
||||||
dst_types = [tint, tuint, tfloat]
|
dst_types = [tint, tuint, tfloat, tbool]
|
||||||
|
|
||||||
for dst_t in dst_types:
|
for dst_t in dst_types:
|
||||||
for bit_size in type_sizes(dst_t):
|
for bit_size in type_sizes(dst_t):
|
||||||
|
@ -211,15 +218,9 @@ for src_t in [tint, tuint, tfloat]:
|
||||||
bit_size, rnd_mode),
|
bit_size, rnd_mode),
|
||||||
dst_t + str(bit_size), src_t, "src0")
|
dst_t + str(bit_size), src_t, "src0")
|
||||||
else:
|
else:
|
||||||
|
conv_expr = "src0 != 0" if dst_t == tbool else "src0"
|
||||||
unop_convert("{0}2{1}{2}".format(src_t[0], dst_t[0], bit_size),
|
unop_convert("{0}2{1}{2}".format(src_t[0], dst_t[0], bit_size),
|
||||||
dst_t + str(bit_size), src_t, "src0")
|
dst_t + str(bit_size), src_t, conv_expr)
|
||||||
|
|
||||||
# We'll hand-code the to/from bool conversion opcodes. Because bool doesn't
|
|
||||||
# have multiple bit-sizes, we can always infer the size from the other type.
|
|
||||||
unop_convert("f2b", tbool32, tfloat, "src0 != 0.0")
|
|
||||||
unop_convert("i2b", tbool32, tint, "src0 != 0")
|
|
||||||
unop_convert("b2f", tfloat, tbool32, "src0 ? 1.0 : 0.0")
|
|
||||||
unop_convert("b2i", tint, tbool32, "src0 ? 1 : 0")
|
|
||||||
|
|
||||||
|
|
||||||
# Unary floating-point rounding operations.
|
# Unary floating-point rounding operations.
|
||||||
|
|
|
@ -41,6 +41,8 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
||||||
|
|
||||||
if (src == dst && src_base == nir_type_float) {
|
if (src == dst && src_base == nir_type_float) {
|
||||||
return nir_op_fmov;
|
return nir_op_fmov;
|
||||||
|
} else if (src == dst && src_base == nir_type_bool) {
|
||||||
|
return nir_op_imov;
|
||||||
} else if ((src_base == nir_type_int || src_base == nir_type_uint) &&
|
} else if ((src_base == nir_type_int || src_base == nir_type_uint) &&
|
||||||
(dst_base == nir_type_int || dst_base == nir_type_uint) &&
|
(dst_base == nir_type_int || dst_base == nir_type_uint) &&
|
||||||
src_bit_size == dst_bit_size) {
|
src_bit_size == dst_bit_size) {
|
||||||
|
@ -51,10 +53,10 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (src_base) {
|
switch (src_base) {
|
||||||
% for src_t in ['int', 'uint', 'float']:
|
% for src_t in ['int', 'uint', 'float', 'bool']:
|
||||||
case nir_type_${src_t}:
|
case nir_type_${src_t}:
|
||||||
switch (dst_base) {
|
switch (dst_base) {
|
||||||
% for dst_t in ['int', 'uint', 'float']:
|
% for dst_t in ['int', 'uint', 'float', 'bool']:
|
||||||
case nir_type_${dst_t}:
|
case nir_type_${dst_t}:
|
||||||
% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']:
|
% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']:
|
||||||
% if dst_t == 'int':
|
% if dst_t == 'int':
|
||||||
|
@ -62,6 +64,14 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
||||||
% else:
|
% else:
|
||||||
<% dst_t = src_t %>
|
<% dst_t = src_t %>
|
||||||
% endif
|
% endif
|
||||||
|
% elif src_t == 'bool' and dst_t in ['int', 'uint', 'bool']:
|
||||||
|
% if dst_t == 'int':
|
||||||
|
<% continue %>
|
||||||
|
% else:
|
||||||
|
<% dst_t = 'int' %>
|
||||||
|
% endif
|
||||||
|
% elif src_t == 'uint' and dst_t == 'bool':
|
||||||
|
<% src_t = 'int' %>
|
||||||
% endif
|
% endif
|
||||||
switch (dst_bit_size) {
|
switch (dst_bit_size) {
|
||||||
% for dst_bits in type_sizes(dst_t):
|
% for dst_bits in type_sizes(dst_t):
|
||||||
|
@ -85,26 +95,10 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
||||||
unreachable("Invalid nir alu bit size");
|
unreachable("Invalid nir alu bit size");
|
||||||
}
|
}
|
||||||
% endfor
|
% endfor
|
||||||
case nir_type_bool:
|
|
||||||
% if src_t == 'float':
|
|
||||||
return nir_op_f2b;
|
|
||||||
% else:
|
|
||||||
return nir_op_i2b;
|
|
||||||
% endif
|
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir alu base type");
|
unreachable("Invalid nir alu base type");
|
||||||
}
|
}
|
||||||
% endfor
|
% endfor
|
||||||
case nir_type_bool:
|
|
||||||
switch (dst_base) {
|
|
||||||
case nir_type_int:
|
|
||||||
case nir_type_uint:
|
|
||||||
return nir_op_b2i;
|
|
||||||
case nir_type_float:
|
|
||||||
return nir_op_b2f;
|
|
||||||
default:
|
|
||||||
unreachable("Invalid nir alu base type");
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir alu base type");
|
unreachable("Invalid nir alu base type");
|
||||||
}
|
}
|
||||||
|
|
|
@ -440,7 +440,7 @@ optimizations = [
|
||||||
(('fsat', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), ('b2f', ('ior', a, b))),
|
(('fsat', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), ('b2f', ('ior', a, b))),
|
||||||
(('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
|
(('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
|
||||||
# True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True).
|
# True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True).
|
||||||
(('ineg', ('b2i@32', 'a@32')), a),
|
(('ineg', ('b2i32', 'a@32')), a),
|
||||||
(('flt', ('fneg', ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
|
(('flt', ('fneg', ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
|
||||||
(('flt', ('fsub', 0.0, ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
|
(('flt', ('fsub', 0.0, ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
|
||||||
# Comparison with the same args. Note that these are not done for
|
# Comparison with the same args. Note that these are not done for
|
||||||
|
@ -532,15 +532,15 @@ optimizations = [
|
||||||
(('fcsel', a, b, b), b),
|
(('fcsel', a, b, b), b),
|
||||||
|
|
||||||
# Conversions
|
# Conversions
|
||||||
(('i2b', ('b2i', 'a@32')), a),
|
(('i2b32', ('b2i', 'a@32')), a),
|
||||||
(('i2b', 'a@bool'), a),
|
(('i2b32', 'a@bool'), a),
|
||||||
(('f2i', ('ftrunc', a)), ('f2i', a)),
|
(('f2i', ('ftrunc', a)), ('f2i', a)),
|
||||||
(('f2u', ('ftrunc', a)), ('f2u', a)),
|
(('f2u', ('ftrunc', a)), ('f2u', a)),
|
||||||
(('i2b', ('ineg', a)), ('i2b', a)),
|
(('i2b', ('ineg', a)), ('i2b', a)),
|
||||||
(('i2b', ('iabs', a)), ('i2b', a)),
|
(('i2b', ('iabs', a)), ('i2b', a)),
|
||||||
(('fabs', ('b2f', a)), ('b2f', a)),
|
(('fabs', ('b2f', a)), ('b2f', a)),
|
||||||
(('iabs', ('b2i', a)), ('b2i', a)),
|
(('iabs', ('b2i', a)), ('b2i', a)),
|
||||||
(('inot', ('f2b', a)), ('feq', a, 0.0)),
|
(('inot', ('f2b32', a)), ('feq', a, 0.0)),
|
||||||
|
|
||||||
# Ironically, mark these as imprecise because removing the conversions may
|
# Ironically, mark these as imprecise because removing the conversions may
|
||||||
# preserve more precision than doing the conversions (e.g.,
|
# preserve more precision than doing the conversions (e.g.,
|
||||||
|
@ -754,8 +754,8 @@ for left, right in itertools.combinations_with_replacement(invert.keys(), 2):
|
||||||
('ior', (invert[left], a, b), (invert[right], c, d))))
|
('ior', (invert[left], a, b), (invert[right], c, d))))
|
||||||
|
|
||||||
# Optimize x2yN(b2x(x)) -> b2y
|
# Optimize x2yN(b2x(x)) -> b2y
|
||||||
optimizations.append((('f2b', ('b2f', 'a@32')), a))
|
optimizations.append((('f2b32', ('b2f', 'a@32')), a))
|
||||||
optimizations.append((('i2b', ('b2i', 'a@32')), a))
|
optimizations.append((('i2b32', ('b2i', 'a@32')), a))
|
||||||
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
|
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
|
||||||
if x != 'f' and y != 'f' and x != y:
|
if x != 'f' and y != 'f' and x != y:
|
||||||
continue
|
continue
|
||||||
|
@ -916,7 +916,7 @@ late_optimizations = [
|
||||||
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
|
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
|
||||||
|
|
||||||
# Lowered for backends without a dedicated b2f instruction
|
# Lowered for backends without a dedicated b2f instruction
|
||||||
(('b2f@32', 'a@32'), ('iand', a, 1.0), 'options->lower_b2f'),
|
(('b2f32', 'a@32'), ('iand', a, 1.0), 'options->lower_b2f'),
|
||||||
]
|
]
|
||||||
|
|
||||||
print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
|
print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
|
||||||
|
|
|
@ -509,7 +509,7 @@ can_propagate_through_alu(nir_src *src)
|
||||||
case nir_op_ior:
|
case nir_op_ior:
|
||||||
case nir_op_iand:
|
case nir_op_iand:
|
||||||
case nir_op_inot:
|
case nir_op_inot:
|
||||||
case nir_op_b2i:
|
case nir_op_b2i32:
|
||||||
return true;
|
return true;
|
||||||
case nir_op_bcsel:
|
case nir_op_bcsel:
|
||||||
return src == &alu->src[0].src;
|
return src == &alu->src[0].src;
|
||||||
|
|
|
@ -108,6 +108,10 @@ nir_op_matches_search_op(nir_op nop, uint16_t sop)
|
||||||
nop == nir_op_##op##32 || \
|
nop == nir_op_##op##32 || \
|
||||||
nop == nir_op_##op##64;
|
nop == nir_op_##op##64;
|
||||||
|
|
||||||
|
#define MATCH_BCONV_CASE(op) \
|
||||||
|
case nir_search_op_##op: \
|
||||||
|
return nop == nir_op_##op##32;
|
||||||
|
|
||||||
switch (sop) {
|
switch (sop) {
|
||||||
MATCH_FCONV_CASE(i2f)
|
MATCH_FCONV_CASE(i2f)
|
||||||
MATCH_FCONV_CASE(u2f)
|
MATCH_FCONV_CASE(u2f)
|
||||||
|
@ -116,6 +120,10 @@ nir_op_matches_search_op(nir_op nop, uint16_t sop)
|
||||||
MATCH_ICONV_CASE(f2i)
|
MATCH_ICONV_CASE(f2i)
|
||||||
MATCH_ICONV_CASE(u2u)
|
MATCH_ICONV_CASE(u2u)
|
||||||
MATCH_ICONV_CASE(i2i)
|
MATCH_ICONV_CASE(i2i)
|
||||||
|
MATCH_FCONV_CASE(b2f)
|
||||||
|
MATCH_ICONV_CASE(b2i)
|
||||||
|
MATCH_BCONV_CASE(i2b)
|
||||||
|
MATCH_BCONV_CASE(f2b)
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir_search_op");
|
unreachable("Invalid nir_search_op");
|
||||||
}
|
}
|
||||||
|
@ -149,6 +157,13 @@ nir_op_for_search_op(uint16_t sop, unsigned bit_size)
|
||||||
default: unreachable("Invalid bit size"); \
|
default: unreachable("Invalid bit size"); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define RET_BCONV_CASE(op) \
|
||||||
|
case nir_search_op_##op: \
|
||||||
|
switch (bit_size) { \
|
||||||
|
case 32: return nir_op_##op##32; \
|
||||||
|
default: unreachable("Invalid bit size"); \
|
||||||
|
}
|
||||||
|
|
||||||
switch (sop) {
|
switch (sop) {
|
||||||
RET_FCONV_CASE(i2f)
|
RET_FCONV_CASE(i2f)
|
||||||
RET_FCONV_CASE(u2f)
|
RET_FCONV_CASE(u2f)
|
||||||
|
@ -157,6 +172,10 @@ nir_op_for_search_op(uint16_t sop, unsigned bit_size)
|
||||||
RET_ICONV_CASE(f2i)
|
RET_ICONV_CASE(f2i)
|
||||||
RET_ICONV_CASE(u2u)
|
RET_ICONV_CASE(u2u)
|
||||||
RET_ICONV_CASE(i2i)
|
RET_ICONV_CASE(i2i)
|
||||||
|
RET_FCONV_CASE(b2f)
|
||||||
|
RET_ICONV_CASE(b2i)
|
||||||
|
RET_BCONV_CASE(i2b)
|
||||||
|
RET_BCONV_CASE(f2b)
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir_search_op");
|
unreachable("Invalid nir_search_op");
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,6 +117,10 @@ enum nir_search_op {
|
||||||
nir_search_op_f2i,
|
nir_search_op_f2i,
|
||||||
nir_search_op_u2u,
|
nir_search_op_u2u,
|
||||||
nir_search_op_i2i,
|
nir_search_op_i2i,
|
||||||
|
nir_search_op_b2f,
|
||||||
|
nir_search_op_b2i,
|
||||||
|
nir_search_op_i2b,
|
||||||
|
nir_search_op_f2b,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
@ -274,7 +274,7 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x)
|
||||||
/* range-reduction fixup */
|
/* range-reduction fixup */
|
||||||
tmp = nir_fadd(b, tmp,
|
tmp = nir_fadd(b, tmp,
|
||||||
nir_fmul(b,
|
nir_fmul(b,
|
||||||
nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
|
nir_b2f32(b, nir_flt(b, one, abs_y_over_x)),
|
||||||
nir_fadd(b, nir_fmul(b, tmp,
|
nir_fadd(b, nir_fmul(b, tmp,
|
||||||
nir_imm_float(b, -2.0f)),
|
nir_imm_float(b, -2.0f)),
|
||||||
nir_imm_float(b, M_PI_2f))));
|
nir_imm_float(b, M_PI_2f))));
|
||||||
|
@ -346,7 +346,7 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
|
||||||
/* Calculate the arctangent and fix up the result if we had flipped the
|
/* Calculate the arctangent and fix up the result if we had flipped the
|
||||||
* coordinate system.
|
* coordinate system.
|
||||||
*/
|
*/
|
||||||
nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip),
|
nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f32(b, flip),
|
||||||
nir_imm_float(b, M_PI_2f)),
|
nir_imm_float(b, M_PI_2f)),
|
||||||
build_atan(b, tan));
|
build_atan(b, tan));
|
||||||
|
|
||||||
|
|
|
@ -1108,18 +1108,21 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
||||||
case nir_op_u2u8:
|
case nir_op_u2u8:
|
||||||
dst[0] = create_cov(ctx, src[0], bs[0], alu->op);
|
dst[0] = create_cov(ctx, src[0], bs[0], alu->op);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
|
dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
|
||||||
dst[0]->cat2.condition = IR3_COND_NE;
|
dst[0]->cat2.condition = IR3_COND_NE;
|
||||||
dst[0] = ir3_n2b(b, dst[0]);
|
dst[0] = ir3_n2b(b, dst[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2f:
|
case nir_op_b2f16:
|
||||||
|
case nir_op_b2f32:
|
||||||
dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
|
dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2i:
|
case nir_op_b2i8:
|
||||||
|
case nir_op_b2i16:
|
||||||
|
case nir_op_b2i32:
|
||||||
dst[0] = ir3_b2n(b, src[0]);
|
dst[0] = ir3_b2n(b, src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
|
dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
|
||||||
dst[0]->cat2.condition = IR3_COND_NE;
|
dst[0]->cat2.condition = IR3_COND_NE;
|
||||||
dst[0] = ir3_n2b(b, dst[0]);
|
dst[0] = ir3_n2b(b, dst[0]);
|
||||||
|
|
|
@ -1208,14 +1208,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
||||||
case nir_op_u2f32:
|
case nir_op_u2f32:
|
||||||
result = qir_ITOF(c, src[0]);
|
result = qir_ITOF(c, src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_b2f:
|
case nir_op_b2f32:
|
||||||
result = qir_AND(c, src[0], qir_uniform_f(c, 1.0));
|
result = qir_AND(c, src[0], qir_uniform_f(c, 1.0));
|
||||||
break;
|
break;
|
||||||
case nir_op_b2i:
|
case nir_op_b2i32:
|
||||||
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
|
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
|
||||||
break;
|
break;
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
qir_SF(c, src[0]);
|
qir_SF(c, src[0]);
|
||||||
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
|
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
|
||||||
qir_uniform_ui(c, ~0),
|
qir_uniform_ui(c, ~0),
|
||||||
|
|
|
@ -788,8 +788,13 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_b2i:
|
case nir_op_b2i8:
|
||||||
case nir_op_b2f:
|
case nir_op_b2i16:
|
||||||
|
case nir_op_b2i32:
|
||||||
|
case nir_op_b2i64:
|
||||||
|
case nir_op_b2f16:
|
||||||
|
case nir_op_b2f32:
|
||||||
|
case nir_op_b2f64:
|
||||||
op[0].type = BRW_REGISTER_TYPE_D;
|
op[0].type = BRW_REGISTER_TYPE_D;
|
||||||
op[0].negate = !op[0].negate;
|
op[0].negate = !op[0].negate;
|
||||||
/* fallthrough */
|
/* fallthrough */
|
||||||
|
@ -1213,15 +1218,15 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
case nir_op_f2b: {
|
case nir_op_f2b32: {
|
||||||
uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
|
uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
/* two-argument instructions can't take 64-bit immediates */
|
/* two-argument instructions can't take 64-bit immediates */
|
||||||
fs_reg zero;
|
fs_reg zero;
|
||||||
fs_reg tmp;
|
fs_reg tmp;
|
||||||
|
|
||||||
if (instr->op == nir_op_f2b) {
|
if (instr->op == nir_op_f2b32) {
|
||||||
zero = vgrf(glsl_type::double_type);
|
zero = vgrf(glsl_type::double_type);
|
||||||
tmp = vgrf(glsl_type::double_type);
|
tmp = vgrf(glsl_type::double_type);
|
||||||
bld.MOV(zero, setup_imm_df(bld, 0.0));
|
bld.MOV(zero, setup_imm_df(bld, 0.0));
|
||||||
|
@ -1240,10 +1245,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
} else {
|
} else {
|
||||||
fs_reg zero;
|
fs_reg zero;
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
zero = instr->op == nir_op_f2b ? brw_imm_f(0.0f) : brw_imm_d(0);
|
zero = instr->op == nir_op_f2b32 ? brw_imm_f(0.0f) : brw_imm_d(0);
|
||||||
} else {
|
} else {
|
||||||
assert(bit_size == 16);
|
assert(bit_size == 16);
|
||||||
zero = instr->op == nir_op_f2b ?
|
zero = instr->op == nir_op_f2b32 ?
|
||||||
retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
|
retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
|
||||||
}
|
}
|
||||||
bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
|
bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
|
||||||
|
|
|
@ -1440,8 +1440,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||||
emit(AND(dst, op[0], op[1]));
|
emit(AND(dst, op[0], op[1]));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_b2i:
|
case nir_op_b2i32:
|
||||||
case nir_op_b2f:
|
case nir_op_b2f32:
|
||||||
|
case nir_op_b2f64:
|
||||||
if (nir_dest_bit_size(instr->dest.dest) > 32) {
|
if (nir_dest_bit_size(instr->dest.dest) > 32) {
|
||||||
assert(dst.type == BRW_REGISTER_TYPE_DF);
|
assert(dst.type == BRW_REGISTER_TYPE_DF);
|
||||||
emit_conversion_to_double(dst, negate(op[0]), false);
|
emit_conversion_to_double(dst, negate(op[0]), false);
|
||||||
|
@ -1450,7 +1451,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b:
|
case nir_op_f2b32:
|
||||||
if (nir_src_bit_size(instr->src[0].src) == 64) {
|
if (nir_src_bit_size(instr->src[0].src) == 64) {
|
||||||
/* We use a MOV with conditional_mod to check if the provided value is
|
/* We use a MOV with conditional_mod to check if the provided value is
|
||||||
* 0.0. We want this to flush denormalized numbers to zero, so we set a
|
* 0.0. We want this to flush denormalized numbers to zero, so we set a
|
||||||
|
@ -1471,7 +1472,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_i2b:
|
case nir_op_i2b32:
|
||||||
emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
|
emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -393,7 +393,7 @@ static void
|
||||||
ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
|
ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
|
||||||
{
|
{
|
||||||
if (b->shader->options->native_integers) {
|
if (b->shader->options->native_integers) {
|
||||||
ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
|
ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1])));
|
||||||
} else {
|
} else {
|
||||||
ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
|
ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
|
||||||
}
|
}
|
||||||
|
@ -406,7 +406,7 @@ static void
|
||||||
ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
|
ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
|
||||||
{
|
{
|
||||||
if (b->shader->options->native_integers) {
|
if (b->shader->options->native_integers) {
|
||||||
ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
|
ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1])));
|
||||||
} else {
|
} else {
|
||||||
ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
|
ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue