nir: Add b2b opcodes

These exist to convert between different types of boolean values.  In
particular, we want to use these for uniform and shared memory
operations where we need to convert to a reasonably sized boolean but we
don't care what its format is so we don't want to make the back-end
insert an actual i2b/b2i.  In the case of uniforms, Mesa can tweak the
format of the uniform boolean to whatever the driver wants.  In the case
of shared, every value in a shared variable comes from the shader so
it's already in the right boolean format.

The new boolean conversion opcodes get replaced with mov in
lower_bool_to_int/float32 so the back-end will hopefully never see them.
However, while we're in the middle of optimizing our NIR, they let us
have sensible load_uniform/ubo intrinsics and also have the bit size
conversion.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4338>
This commit is contained in:
Jason Ekstrand 2020-03-27 00:18:43 -05:00 committed by Marge Bot
parent 2cb9cc56d5
commit b2db84153a
5 changed files with 22 additions and 2 deletions

View File

@ -167,6 +167,16 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
bit_size == 16 ? nir_op_i2b16 : nir_op_i2b32;
break;
case nir_op_b2b1:
/* Since the canonical bit size is the size of the src, it's a no-op */
opcode = nir_op_mov;
break;
case nir_op_b2b32:
/* For up-converting booleans, sign-extend */
opcode = nir_op_i2i32;
break;
case nir_op_flt:
opcode = bit_size == 8 ? nir_op_flt8 :
bit_size == 16 ? nir_op_flt16 : nir_op_flt32;

View File

@ -68,6 +68,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
nir_imm_float(b, 0));
break;
case nir_op_b2b1: alu->op = nir_op_mov; break;
case nir_op_flt: alu->op = nir_op_slt; break;
case nir_op_fge: alu->op = nir_op_sge; break;

View File

@ -65,6 +65,15 @@ lower_alu_instr(nir_alu_instr *alu)
case nir_op_f2b1: alu->op = nir_op_f2b32; break;
case nir_op_i2b1: alu->op = nir_op_i2b32; break;
case nir_op_b2b32:
case nir_op_b2b1:
/* We're mutating instructions in a dominance-preserving order so our
* source boolean should be 32-bit by now.
*/
assert(nir_src_bit_size(alu->src[0].src) == 32);
alu->op = nir_op_mov;
break;
case nir_op_flt: alu->op = nir_op_flt32; break;
case nir_op_fge: alu->op = nir_op_fge32; break;
case nir_op_feq: alu->op = nir_op_feq32; break;

View File

@ -211,7 +211,7 @@ unop("flog2", tfloat, "log2f(src0)")
# Generate all of the numeric conversion opcodes
for src_t in [tint, tuint, tfloat, tbool]:
if src_t == tbool:
dst_types = [tfloat, tint]
dst_types = [tfloat, tint, tbool]
elif src_t == tint:
dst_types = [tfloat, tint, tbool]
elif src_t == tuint:

View File

@ -1359,7 +1359,7 @@ for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
optimizations.append(((x2yN, (b2x, a)), (b2y, a)))
# Optimize away x2xN(a@N)
for t in ['int', 'uint', 'float']:
for t in ['int', 'uint', 'float', 'bool']:
for N in type_sizes(t):
x2xN = '{0}2{0}{1}'.format(t[0], N)
aN = 'a@{0}'.format(N)