nir: Rework conversion opcodes

The NIR story on conversion opcodes is a mess.  We've had way too many
of them, naming is inconsistent, and which ones have explicit sizes was
sort-of random.  This commit re-organizes things and makes them all
consistent:

 - All non-bool conversion opcodes now have the explicit size in the
   destination and are named <src_type>2<dst_type><size>.

 - Integer <-> integer conversion opcodes now only come in i2i and u2u
   forms (i2u and u2i have been removed) since the only difference
   between the different integer conversions is whether or not they
   sign-extend when up-converting.

 - Boolean conversion opcodes all have the explicit size on the bool and
   are named <src_type>2<dst_type>.

Making things consistent also allows nir_type_conversion_op to be moved
to nir_opcodes.c and auto-generated using mako.  This will make adding
int8, int16, and float16 versions much easier when the time comes.

Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Jason Ekstrand 2017-03-07 19:54:37 -08:00
parent 7107b32155
commit 762a6333f2
22 changed files with 217 additions and 307 deletions

View File

@ -1449,41 +1449,37 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
src[i] = to_integer(ctx, src[i]);
result = ac_build_gather_values(&ctx->ac, src, num_components);
break;
case nir_op_d2i:
case nir_op_f2i:
case nir_op_f2i32:
case nir_op_f2i64:
src[0] = to_float(ctx, src[0]);
result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, "");
break;
case nir_op_d2u:
case nir_op_f2u:
case nir_op_f2u32:
case nir_op_f2u64:
src[0] = to_float(ctx, src[0]);
result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, "");
break;
case nir_op_i2d:
case nir_op_i2f:
case nir_op_i2f32:
case nir_op_i2f64:
result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), "");
break;
case nir_op_u2d:
case nir_op_u2f:
case nir_op_u2f32:
case nir_op_u2f64:
result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), "");
break;
case nir_op_f2d:
case nir_op_f2f64:
result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(ctx, def_type), "");
break;
case nir_op_d2f:
case nir_op_f2f32:
result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), "");
break;
case nir_op_u2u32:
case nir_op_u2u64:
case nir_op_u2i32:
case nir_op_u2i64:
if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type))
result = LLVMBuildZExt(ctx->builder, src[0], def_type, "");
else
result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
break;
case nir_op_i2u32:
case nir_op_i2u64:
case nir_op_i2i32:
case nir_op_i2i64:
if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type))

View File

@ -586,7 +586,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
@ -615,7 +615,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
@ -644,7 +644,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;
nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);

View File

@ -1456,10 +1456,10 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
case ir_unop_i2f:
result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
break;
case ir_unop_u2f:
result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
break;
case ir_unop_b2f:
result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
@ -1502,6 +1502,10 @@ nir_visitor::visit(ir_expression *ir)
nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);
result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type),
srcs[0], NULL, NULL, NULL);
/* b2i and b2f don't have fixed bit-size versions so the builder will
* just assume 32 and we have to fix it up here.
*/
result->bit_size = nir_alu_type_get_type_size(dst_type);
break;
}

View File

@ -1958,125 +1958,3 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
unreachable("intrinsic doesn't produce a system value");
}
}
nir_op
nir_type_conversion_op(nir_alu_type src, nir_alu_type dst)
{
nir_alu_type src_base_type = (nir_alu_type) nir_alu_type_get_base_type(src);
nir_alu_type dst_base_type = (nir_alu_type) nir_alu_type_get_base_type(dst);
unsigned src_bitsize = nir_alu_type_get_type_size(src);
unsigned dst_bitsize = nir_alu_type_get_type_size(dst);
if (src_bitsize == dst_bitsize) {
switch (src_base_type) {
case nir_type_int:
case nir_type_uint:
if (dst_base_type == nir_type_uint || dst_base_type == nir_type_int)
return nir_op_imov;
break;
case nir_type_float:
if (dst_base_type == nir_type_float)
return nir_op_fmov;
break;
case nir_type_bool:
if (dst_base_type == nir_type_bool)
return nir_op_imov;
break;
default:
unreachable("Invalid conversion");
}
}
switch (src_base_type) {
case nir_type_int:
switch (dst_base_type) {
case nir_type_int:
assert(src_bitsize != dst_bitsize);
return (dst_bitsize == 32) ? nir_op_i2i32 : nir_op_i2i64;
case nir_type_uint:
assert(src_bitsize != dst_bitsize);
return (dst_bitsize == 32) ? nir_op_i2u32 : nir_op_i2u64;
case nir_type_float:
switch (src_bitsize) {
case 32:
return (dst_bitsize == 32) ? nir_op_i2f : nir_op_i2d;
case 64:
return (dst_bitsize == 32) ? nir_op_i642f : nir_op_i642d;
default:
unreachable("Invalid conversion");
}
case nir_type_bool:
return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b;
default:
unreachable("Invalid conversion");
}
case nir_type_uint:
switch (dst_base_type) {
case nir_type_int:
assert(src_bitsize != dst_bitsize);
return (dst_bitsize == 32) ? nir_op_u2i32 : nir_op_u2i64;
case nir_type_uint:
assert(src_bitsize != dst_bitsize);
return (dst_bitsize == 32) ? nir_op_u2u32 : nir_op_u2u64;
case nir_type_float:
switch (src_bitsize) {
case 32:
return (dst_bitsize == 32) ? nir_op_u2f : nir_op_u2d;
case 64:
return (dst_bitsize == 32) ? nir_op_u642f : nir_op_u642d;
default:
unreachable("Invalid conversion");
}
case nir_type_bool:
return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b;
default:
unreachable("Invalid conversion");
}
case nir_type_float:
switch (dst_base_type) {
case nir_type_int:
switch (src_bitsize) {
case 32:
return (dst_bitsize == 32) ? nir_op_f2i : nir_op_f2i64;
case 64:
return (dst_bitsize == 32) ? nir_op_d2i : nir_op_f2i64;
default:
unreachable("Invalid conversion");
}
case nir_type_uint:
switch (src_bitsize) {
case 32:
return (dst_bitsize == 32) ? nir_op_f2u : nir_op_f2u64;
case 64:
return (dst_bitsize == 32) ? nir_op_d2u : nir_op_f2u64;
default:
unreachable("Invalid conversion");
}
case nir_type_float:
assert(src_bitsize != dst_bitsize);
return (dst_bitsize == 32) ? nir_op_d2f : nir_op_f2d;
case nir_type_bool:
return (src_bitsize == 32) ? nir_op_f2b : nir_op_d2b;
default:
unreachable("Invalid conversion");
}
case nir_type_bool:
switch (dst_base_type) {
case nir_type_int:
case nir_type_uint:
return (dst_bitsize == 32) ? nir_op_b2i : nir_op_b2i64;
case nir_type_float:
/* GLSL just emits f2d(b2f(x)) for b2d */
assert(dst_bitsize == 32);
return nir_op_b2f;
default:
unreachable("Invalid conversion");
}
default:
unreachable("Invalid conversion");
}
}

View File

@ -328,6 +328,10 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
}
}
/* When in doubt, assume 32. */
if (bit_size == 0)
bit_size = 32;
/* Make sure we don't swizzle from outside of our source vector (like if a
* scalar value was passed into a multiply with a vector).
*/

View File

@ -116,7 +116,7 @@ lower_rcp(nir_builder *b, nir_ssa_def *src)
/* cast to float, do an rcp, and then cast back to get an approximate
* result
*/
nir_ssa_def *ra = nir_f2d(b, nir_frcp(b, nir_d2f(b, src_norm)));
nir_ssa_def *ra = nir_f2f64(b, nir_frcp(b, nir_f2f32(b, src_norm)));
/* Fixup the exponent of the result - note that we check if this is too
* small below.
@ -180,7 +180,7 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt)
nir_iadd(b, nir_imm_int(b, 1023),
even));
nir_ssa_def *ra = nir_f2d(b, nir_frsq(b, nir_d2f(b, src_norm)));
nir_ssa_def *ra = nir_f2f64(b, nir_frsq(b, nir_f2f32(b, src_norm)));
nir_ssa_def *new_exp = nir_isub(b, get_exponent(b, ra), half);
ra = set_exponent(b, ra, new_exp);

View File

@ -56,15 +56,15 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
denom = nir_ssa_for_alu_src(bld, alu, 1);
if (is_signed) {
af = nir_i2f(bld, numer);
bf = nir_i2f(bld, denom);
af = nir_i2f32(bld, numer);
bf = nir_i2f32(bld, denom);
af = nir_fabs(bld, af);
bf = nir_fabs(bld, bf);
a = nir_iabs(bld, numer);
b = nir_iabs(bld, denom);
} else {
af = nir_u2f(bld, numer);
bf = nir_u2f(bld, denom);
af = nir_u2f32(bld, numer);
bf = nir_u2f32(bld, denom);
a = numer;
b = denom;
}
@ -75,17 +75,17 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
q = nir_fmul(bld, af, bf);
if (is_signed) {
q = nir_f2i(bld, q);
q = nir_f2i32(bld, q);
} else {
q = nir_f2u(bld, q);
q = nir_f2u32(bld, q);
}
/* get error of first result: */
r = nir_imul(bld, q, b);
r = nir_isub(bld, a, r);
r = nir_u2f(bld, r);
r = nir_u2f32(bld, r);
r = nir_fmul(bld, r, bf);
r = nir_f2u(bld, r);
r = nir_f2u32(bld, r);
/* add quotients: */
q = nir_iadd(bld, q, r);

View File

@ -121,7 +121,7 @@ lower_offset(nir_builder *b, nir_tex_instr *tex)
nir_ssa_def *offset_coord;
if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
offset_coord = nir_fadd(b, coord, nir_i2f(b, offset));
offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
} else {
offset_coord = nir_iadd(b, coord, offset);
}
@ -176,7 +176,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
nir_tex_instr_dest_size(txs), 32, NULL);
nir_builder_instr_insert(b, &txs->instr);
return nir_i2f(b, &txs->dest.ssa);
return nir_i2f32(b, &txs->dest.ssa);
}
static void

View File

@ -165,42 +165,26 @@ unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
unop_convert("i2i32", tint32, tint, "src0") # General int (int8_t, int64_t, etc.) to int32_t conversion
unop_convert("u2i32", tint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int32_t conversion
unop_convert("i2u32", tuint32, tint, "src0") # General int (int8_t, int64_t, etc.) to uint32_t conversion
unop_convert("u2u32", tuint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion
unop_convert("i2i64", tint64, tint, "src0") # General int (int8_t, int32_t, etc.) to int64_t conversion
unop_convert("u2i64", tint64, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int64_t conversion
unop_convert("f2i64", tint64, tfloat, "src0") # General float (float or double) to int64_t conversion
unop_convert("i2u64", tuint64, tint, "src0") # General int (int8_t, int64_t, etc.) to uint64_t conversion
unop_convert("u2u64", tuint64, tuint, "src0") # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion
unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion
unop_convert("i642f", tfloat32, tint64, "src0") # int64_t-to-float conversion.
unop_convert("i642b", tbool, tint64, "src0") # int64_t-to-bool conversion.
unop_convert("i642d", tfloat64, tint64, "src0") # int64_t-to-double conversion.
unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion.
unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion.
# Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
# Boolean-to-float conversion
unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
# Generate all of the numeric conversion opcodes
for src_t in [tint, tuint, tfloat]:
if src_t in (tint, tuint):
dst_types = [tfloat, src_t]
elif src_t == tfloat:
dst_types = [tint, tuint, tfloat]
for dst_t in dst_types:
for bit_size in [32, 64]:
unop_convert("{}2{}{}".format(src_t[0], dst_t[0], bit_size),
dst_t + str(bit_size), src_t, "src0")
# We'll hand-code the to/from bool conversion opcodes. Because bool doesn't
# have multiple bit-sizes, we can always infer the size from the other type.
unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
unop_convert("i2b", tbool, tint, "src0 != 0")
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0") # Boolean-to-int64_t conversion.
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
# double-to-float conversion
unop_convert("d2f", tfloat32, tfloat64, "src0") # Double to single precision
unop_convert("f2d", tfloat64, tfloat32, "src0") # Single to double precision
unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
# Unary floating-point rounding operations.

View File

@ -29,6 +29,72 @@ from mako.template import Template
template = Template("""
#include "nir.h"
nir_op
nir_type_conversion_op(nir_alu_type src, nir_alu_type dst)
{
nir_alu_type src_base = (nir_alu_type) nir_alu_type_get_base_type(src);
nir_alu_type dst_base = (nir_alu_type) nir_alu_type_get_base_type(dst);
unsigned src_bit_size = nir_alu_type_get_type_size(src);
unsigned dst_bit_size = nir_alu_type_get_type_size(dst);
if (src == dst && src_base == nir_type_float) {
return nir_op_fmov;
} else if ((src_base == nir_type_int || src_base == nir_type_uint) &&
(dst_base == nir_type_int || dst_base == nir_type_uint) &&
src_bit_size == dst_bit_size) {
/* Integer <-> integer conversions with the same bit-size on both
* ends are just no-op moves.
*/
return nir_op_imov;
}
switch (src_base) {
% for src_t in ['int', 'uint', 'float']:
case nir_type_${src_t}:
switch (dst_base) {
% for dst_t in ['int', 'uint', 'float']:
case nir_type_${dst_t}:
% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']:
% if dst_t == 'int':
<% continue %>
% else:
<% dst_t = src_t %>
% endif
% endif
switch (dst_bit_size) {
% for dst_bits in [32, 64]:
case ${dst_bits}:
return ${'nir_op_{}2{}{}'.format(src_t[0], dst_t[0], dst_bits)};
% endfor
default:
unreachable("Invalid nir alu bit size");
}
% endfor
case nir_type_bool:
% if src_t == 'float':
return nir_op_f2b;
% else:
return nir_op_i2b;
% endif
default:
unreachable("Invalid nir alu base type");
}
% endfor
case nir_type_bool:
switch (dst_base) {
case nir_type_int:
case nir_type_uint:
return nir_op_b2i;
case nir_type_float:
return nir_op_b2f;
default:
unreachable("Invalid nir alu base type");
}
default:
unreachable("Invalid nir alu base type");
}
}
const nir_op_info nir_op_infos[nir_num_opcodes] = {
% for name, opcode in sorted(opcodes.iteritems()):
{

View File

@ -78,7 +78,7 @@ optimizations = [
(('ineg', ('ineg', a)), a),
(('fabs', ('fabs', a)), ('fabs', a)),
(('fabs', ('fneg', a)), ('fabs', a)),
(('fabs', ('u2f', a)), ('u2f', a)),
(('fabs', ('u2f32', a)), ('u2f32', a)),
(('iabs', ('iabs', a)), ('iabs', a)),
(('iabs', ('ineg', a)), ('iabs', a)),
(('~fadd', a, 0.0), a),
@ -212,7 +212,7 @@ optimizations = [
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
(('iand', 'a@bool', 1.0), ('b2f', a)),
# True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True).
(('ineg', ('b2i', a)), a),
(('ineg', ('b2i@32', a)), a),
(('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
@ -298,8 +298,8 @@ optimizations = [
# Conversions
(('i2b', ('b2i', a)), a),
(('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)),
(('f2i32', ('ftrunc', a)), ('f2i32', a)),
(('f2u32', ('ftrunc', a)), ('f2u32', a)),
(('i2b', ('ineg', a)), ('i2b', a)),
(('i2b', ('iabs', a)), ('i2b', a)),
(('fabs', ('b2f', a)), ('b2f', a)),
@ -387,49 +387,49 @@ optimizations = [
(('pack_unorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
'options->lower_pack_unorm_2x16'),
(('pack_unorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
'options->lower_pack_unorm_4x8'),
(('pack_snorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
'options->lower_pack_snorm_2x16'),
(('pack_snorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
('extract_u16', 'v', 1))),
('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),
('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
('extract_u8', 'v', 1),
('extract_u8', 'v', 2),
('extract_u8', 'v', 3))),
('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0),
('extract_u8', 'v', 1),
('extract_u8', 'v', 2),
('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
('extract_i16', 'v', 1))),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec2', ('extract_i16', 'v', 0),
('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
('extract_i8', 'v', 1),
('extract_i8', 'v', 2),
('extract_i8', 'v', 3))),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec4', ('extract_i8', 'v', 0),
('extract_i8', 'v', 1),
('extract_i8', 'v', 2),
('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]

View File

@ -866,7 +866,7 @@ ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
static void
ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
ttn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
}
/* EXP - Approximate Exponential Base 2
@ -1587,7 +1587,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_POPA] = 0, /* XXX */
[TGSI_OPCODE_CEIL] = nir_op_fceil,
[TGSI_OPCODE_I2F] = nir_op_i2f,
[TGSI_OPCODE_I2F] = nir_op_i2f32,
[TGSI_OPCODE_NOT] = nir_op_inot,
[TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
[TGSI_OPCODE_SHL] = nir_op_ishl,
@ -1624,7 +1624,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_END] = 0,
[TGSI_OPCODE_F2I] = nir_op_f2i,
[TGSI_OPCODE_F2I] = nir_op_f2i32,
[TGSI_OPCODE_IDIV] = nir_op_idiv,
[TGSI_OPCODE_IMAX] = nir_op_imax,
[TGSI_OPCODE_IMIN] = nir_op_imin,
@ -1632,8 +1632,8 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ISGE] = nir_op_ige,
[TGSI_OPCODE_ISHR] = nir_op_ishr,
[TGSI_OPCODE_ISLT] = nir_op_ilt,
[TGSI_OPCODE_F2U] = nir_op_f2u,
[TGSI_OPCODE_U2F] = nir_op_u2f,
[TGSI_OPCODE_F2U] = nir_op_f2u32,
[TGSI_OPCODE_U2F] = nir_op_u2f32,
[TGSI_OPCODE_UADD] = nir_op_iadd,
[TGSI_OPCODE_UDIV] = nir_op_udiv,
[TGSI_OPCODE_UMAD] = 0,

View File

@ -722,16 +722,16 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
}
switch (alu->op) {
case nir_op_f2i:
case nir_op_f2i32:
dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_S32);
break;
case nir_op_f2u:
case nir_op_f2u32:
dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_U32);
break;
case nir_op_i2f:
case nir_op_i2f32:
dst[0] = ir3_COV(b, src[0], TYPE_S32, TYPE_F32);
break;
case nir_op_u2f:
case nir_op_u2f32:
dst[0] = ir3_COV(b, src[0], TYPE_U32, TYPE_F32);
break;
case nir_op_imov:

View File

@ -637,7 +637,7 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
* coordinate, instead.
*/
nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
nir_ssa_def *bitmask = nir_isub(b,
nir_ishl(b,
nir_imm_int(b, 1),

View File

@ -106,11 +106,11 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
} else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
if (chan->normalized) {
return nir_fmul(b,
nir_i2f(b, vpm_reads[swiz]),
nir_i2f32(b, vpm_reads[swiz]),
nir_imm_float(b,
1.0 / 0x7fffffff));
} else {
return nir_i2f(b, vpm_reads[swiz]);
return nir_i2f32(b, vpm_reads[swiz]);
}
} else if (chan->size == 8 &&
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
@ -125,16 +125,16 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
nir_imm_float(b, 1.0));
} else {
return nir_fadd(b,
nir_i2f(b,
vc4_nir_unpack_8i(b, temp,
swiz)),
nir_i2f32(b,
vc4_nir_unpack_8i(b, temp,
swiz)),
nir_imm_float(b, -128.0));
}
} else {
if (chan->normalized) {
return vc4_nir_unpack_8f(b, vpm, swiz);
} else {
return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz));
return nir_i2f32(b, vc4_nir_unpack_8i(b, vpm, swiz));
}
}
} else if (chan->size == 16 &&
@ -146,7 +146,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
* UNPACK_16_I for all of these.
*/
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
temp = nir_i2f32(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
if (chan->normalized) {
return nir_fmul(b, temp,
nir_imm_float(b, 1/32768.0f));
@ -154,7 +154,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
return temp;
}
} else {
temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
temp = nir_i2f32(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
if (chan->normalized) {
return nir_fmul(b, temp,
nir_imm_float(b, 1 / 65535.0));

View File

@ -1150,12 +1150,12 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
result = qir_FMAX(c, src[0], src[1]);
break;
case nir_op_f2i:
case nir_op_f2u:
case nir_op_f2i32:
case nir_op_f2u32:
result = qir_FTOI(c, src[0]);
break;
case nir_op_i2f:
case nir_op_u2f:
case nir_op_i2f32:
case nir_op_u2f32:
result = qir_ITOF(c, src[0]);
break;
case nir_op_b2f:

View File

@ -97,7 +97,7 @@ blorp_blit_get_frag_coords(nir_builder *b,
const struct brw_blorp_blit_prog_key *key,
struct brw_blorp_blit_vars *v)
{
nir_ssa_def *coord = nir_f2i(b, nir_load_var(b, v->frag_coord));
nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, v->frag_coord));
/* Account for destination surface intratile offset
*
@ -764,7 +764,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
nir_ssa_def *sample_off = nir_imm_vec2(b, sample_off_x, sample_off_y);
nir_ssa_def *sample_coords = nir_fadd(b, pos_xy, sample_off);
nir_ssa_def *sample_coords_int = nir_f2i(b, sample_coords);
nir_ssa_def *sample_coords_int = nir_f2i32(b, sample_coords);
/* The MCS value we fetch has to match up with the pixel that we're
* sampling from. Since we sample from different pixels in each
@ -821,7 +821,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
nir_ssa_def *sample =
nir_fdot2(b, frac, nir_imm_vec2(b, key->x_scale,
key->x_scale * key->y_scale));
sample = nir_f2i(b, sample);
sample = nir_f2i32(b, sample);
if (tex_samples == 8) {
sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573),
@ -1150,7 +1150,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
blorp_nir_discard_if_outside_rect(&b, dst_pos, &v);
}
src_pos = blorp_blit_apply_transform(&b, nir_i2f(&b, dst_pos), &v);
src_pos = blorp_blit_apply_transform(&b, nir_i2f32(&b, dst_pos), &v);
if (dst_pos->num_components == 3) {
/* The sample coordinate is an integer that we want left alone but
* blorp_blit_apply_transform() blindly applies the transform to all
@ -1175,7 +1175,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
/* Resolves (effecively) use texelFetch, so we need integers and we
* don't care about the sample index if we got one.
*/
src_pos = nir_f2i(&b, nir_channels(&b, src_pos, 0x3));
src_pos = nir_f2i32(&b, nir_channels(&b, src_pos, 0x3));
if (devinfo->gen == 6) {
/* Because gen6 only supports 4x interleved MSAA, we can do all the
@ -1187,7 +1187,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
*/
src_pos = nir_ishl(&b, src_pos, nir_imm_int(&b, 1));
src_pos = nir_iadd(&b, src_pos, nir_imm_int(&b, 1));
src_pos = nir_i2f(&b, src_pos);
src_pos = nir_i2f32(&b, src_pos);
color = blorp_nir_tex(&b, &v, src_pos, key->texture_data_type);
} else {
/* Gen7+ hardware doesn't automaticaly blend. */
@ -1204,11 +1204,11 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
} else {
/* We're going to use texelFetch, so we need integers */
if (src_pos->num_components == 2) {
src_pos = nir_f2i(&b, src_pos);
src_pos = nir_f2i32(&b, src_pos);
} else {
assert(src_pos->num_components == 3);
src_pos = nir_vec3(&b, nir_channel(&b, nir_f2i(&b, src_pos), 0),
nir_channel(&b, nir_f2i(&b, src_pos), 1),
src_pos = nir_vec3(&b, nir_channel(&b, nir_f2i32(&b, src_pos), 0),
nir_channel(&b, nir_f2i32(&b, src_pos), 1),
nir_channel(&b, src_pos, 2));
}

View File

@ -641,17 +641,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
}
switch (instr->op) {
case nir_op_i2f:
case nir_op_u2f:
case nir_op_i2f32:
case nir_op_u2f32:
if (optimize_extract_to_float(instr, result))
return;
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_f2d:
case nir_op_i2d:
case nir_op_u2d:
case nir_op_f2f64:
case nir_op_i2f64:
case nir_op_u2f64:
/* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
*
* "When source or destination is 64b (...), regioning in Align1
@ -676,25 +676,15 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
}
/* fallthrough */
case nir_op_i642d:
case nir_op_u642d:
case nir_op_f2f32:
case nir_op_f2i32:
case nir_op_f2u32:
case nir_op_f2i64:
case nir_op_f2u64:
case nir_op_i2i64:
case nir_op_i2u64:
case nir_op_u2i64:
case nir_op_u2u64:
case nir_op_d2f:
case nir_op_d2i:
case nir_op_d2u:
case nir_op_i642f:
case nir_op_u642f:
case nir_op_u2i32:
case nir_op_i2i32:
case nir_op_i2i64:
case nir_op_u2u32:
case nir_op_i2u32:
case nir_op_f2i:
case nir_op_f2u:
case nir_op_u2u64:
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
@ -1077,7 +1067,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
case nir_op_b2i64:
case nir_op_b2i:
case nir_op_b2f:
bld.MOV(result, negate(op[0]));
@ -1085,14 +1074,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_i2b:
case nir_op_f2b:
case nir_op_i642b:
case nir_op_d2b:
if (nir_src_bit_size(instr->src[0].src) == 64) {
/* two-argument instructions can't take 64-bit immediates */
fs_reg zero;
fs_reg tmp;
if (instr->op == nir_op_d2b) {
if (instr->op == nir_op_f2b) {
zero = vgrf(glsl_type::double_type);
tmp = vgrf(glsl_type::double_type);
} else {

View File

@ -99,7 +99,7 @@ apply_attr_wa_block(nir_block *block, struct attr_wa_state *state)
nir_imm_vec4(b, 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 9) - 1),
1.0f / ((1 << 9) - 1), 1.0f / ((1 << 1) - 1));
val = nir_fmax(b,
nir_fmul(b, nir_i2f(b, val), es3_normalize_factor),
nir_fmul(b, nir_i2f32(b, val), es3_normalize_factor),
nir_imm_float(b, -1.0f));
} else {
/* The following equations are from the OpenGL 3.2 specification:
@ -121,18 +121,18 @@ apply_attr_wa_block(nir_block *block, struct attr_wa_state *state)
/* For signed normalization, the numerator is 2c+1. */
nir_ssa_def *two = nir_imm_float(b, 2.0f);
nir_ssa_def *one = nir_imm_float(b, 1.0f);
val = nir_fadd(b, nir_fmul(b, nir_i2f(b, val), two), one);
val = nir_fadd(b, nir_fmul(b, nir_i2f32(b, val), two), one);
} else {
/* For unsigned normalization, the numerator is just c. */
val = nir_u2f(b, val);
val = nir_u2f32(b, val);
}
val = nir_fmul(b, val, normalize_factor);
}
}
if (wa_flags & BRW_ATTRIB_WA_SCALE) {
val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f(b, val)
: nir_u2f(b, val);
val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f32(b, val)
: nir_u2f32(b, val);
}
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, nir_src_for_ssa(val),

View File

@ -1287,32 +1287,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_vec4:
unreachable("not reached: should be handled by lower_vec_to_movs()");
case nir_op_i2f:
case nir_op_u2f:
case nir_op_i2f32:
case nir_op_u2f32:
inst = emit(MOV(dst, op[0]));
inst->saturate = instr->dest.saturate;
break;
case nir_op_f2i:
case nir_op_f2u:
inst = emit(MOV(dst, op[0]));
case nir_op_f2f32:
case nir_op_f2i32:
case nir_op_f2u32:
if (nir_src_bit_size(instr->src[0].src) == 64)
emit_conversion_from_double(dst, op[0], instr->dest.saturate);
else
inst = emit(MOV(dst, op[0]));
break;
case nir_op_d2f:
emit_conversion_from_double(dst, op[0], instr->dest.saturate);
break;
case nir_op_f2d:
emit_conversion_to_double(dst, op[0], instr->dest.saturate);
break;
case nir_op_d2i:
case nir_op_d2u:
emit_conversion_from_double(dst, op[0], instr->dest.saturate);
break;
case nir_op_i2d:
case nir_op_u2d:
case nir_op_f2f64:
case nir_op_i2f64:
case nir_op_u2f64:
emit_conversion_to_double(dst, op[0], instr->dest.saturate);
break;
@ -1681,26 +1673,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_f2b:
emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
break;
if (nir_src_bit_size(instr->src[0].src) == 64) {
/* We use a MOV with conditional_mod to check if the provided value is
* 0.0. We want this to flush denormalized numbers to zero, so we set a
* source modifier on the source operand to trigger this, as source
* modifiers don't affect the result of the testing against 0.0.
*/
src_reg value = op[0];
value.abs = true;
vec4_instruction *inst = emit(MOV(dst_null_df(), value));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
case nir_op_d2b: {
/* We use a MOV with conditional_mod to check if the provided value is
* 0.0. We want this to flush denormalized numbers to zero, so we set a
* source modifier on the source operand to trigger this, as source
* modifiers don't affect the result of the testing against 0.0.
*/
src_reg value = op[0];
value.abs = true;
vec4_instruction *inst = emit(MOV(dst_null_df(), value));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
src_reg one = src_reg(this, glsl_type::ivec4_type);
emit(MOV(dst_reg(one), brw_imm_d(~0)));
inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0));
inst->predicate = BRW_PREDICATE_NORMAL;
src_reg one = src_reg(this, glsl_type::ivec4_type);
emit(MOV(dst_reg(one), brw_imm_d(~0)));
inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0));
inst->predicate = BRW_PREDICATE_NORMAL;
} else {
emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
}
break;
}
case nir_op_i2b:
emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));

View File

@ -57,7 +57,7 @@ try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load)
nir_builder_init(&b, impl);
b.cursor = nir_before_instr(&load->instr);
nir_ssa_def *frag_coord = nir_f2i(&b, load_frag_coord(&b));
nir_ssa_def *frag_coord = nir_f2i32(&b, load_frag_coord(&b));
nir_ssa_def *offset = nir_ssa_for_src(&b, load->src[0], 2);
nir_ssa_def *pos = nir_iadd(&b, frag_coord, offset);

View File

@ -306,7 +306,7 @@ ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
static void
ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
{
ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
}
/* EXP - Approximate Exponential Base 2