nir: Switch to using 1-bit Booleans for almost everything
This is a squash of a few distinct changes: glsl,spirv: Generate 1-bit Booleans Revert "Use 32-bit opcodes in the NIR producers and optimizations" Revert "nir/builder: Generate 32-bit bool opcodes transparently" nir/builder: Generate 1-bit Booleans in nir_build_imm_bool Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Tested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
parent
11dc130779
commit
44227453ec
|
@ -261,7 +261,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
|
|||
assert(cols == 1);
|
||||
|
||||
for (unsigned r = 0; r < rows; r++)
|
||||
ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE;
|
||||
ret->values[0].b[r] = ir->value.b[r];
|
||||
|
||||
break;
|
||||
|
||||
|
@ -1000,7 +1000,8 @@ nir_visitor::visit(ir_call *ir)
|
|||
assert(write_mask);
|
||||
|
||||
nir_ssa_def *nir_val = evaluate_rvalue(val);
|
||||
assert(!val->type->is_boolean() || nir_val->bit_size == 32);
|
||||
if (val->type->is_boolean())
|
||||
nir_val = nir_b2i32(&b, nir_val);
|
||||
|
||||
instr->src[0] = nir_src_for_ssa(nir_val);
|
||||
instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
|
||||
|
@ -1110,6 +1111,10 @@ nir_visitor::visit(ir_call *ir)
|
|||
type->vector_elements, bit_size, NULL);
|
||||
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
|
||||
/* The value in shared memory is a 32-bit value */
|
||||
if (type->is_boolean())
|
||||
ret = nir_i2b(&b, &instr->dest.ssa);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_store_shared: {
|
||||
|
@ -1129,7 +1134,9 @@ nir_visitor::visit(ir_call *ir)
|
|||
nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
|
||||
|
||||
nir_ssa_def *nir_val = evaluate_rvalue(val);
|
||||
assert(!val->type->is_boolean() || nir_val->bit_size == 32);
|
||||
/* The value in shared memory is a 32-bit value */
|
||||
if (val->type->is_boolean())
|
||||
nir_val = nir_b2i32(&b, nir_val);
|
||||
|
||||
instr->src[0] = nir_src_for_ssa(nir_val);
|
||||
instr->num_components = val->type->vector_elements;
|
||||
|
@ -1187,7 +1194,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
case nir_intrinsic_vote_any:
|
||||
case nir_intrinsic_vote_all:
|
||||
case nir_intrinsic_vote_ieq: {
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
|
||||
instr->num_components = 1;
|
||||
|
||||
ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
|
||||
|
|
|
@ -828,7 +828,7 @@ nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
|
|||
{
|
||||
switch (base_type) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
return nir_type_bool32;
|
||||
return nir_type_bool1;
|
||||
break;
|
||||
case GLSL_TYPE_UINT:
|
||||
return nir_type_uint32;
|
||||
|
@ -1568,18 +1568,18 @@ static inline bool
|
|||
nir_alu_instr_is_comparison(const nir_alu_instr *instr)
|
||||
{
|
||||
switch (instr->op) {
|
||||
case nir_op_flt32:
|
||||
case nir_op_fge32:
|
||||
case nir_op_feq32:
|
||||
case nir_op_fne32:
|
||||
case nir_op_ilt32:
|
||||
case nir_op_ult32:
|
||||
case nir_op_ige32:
|
||||
case nir_op_uge32:
|
||||
case nir_op_ieq32:
|
||||
case nir_op_ine32:
|
||||
case nir_op_i2b32:
|
||||
case nir_op_f2b32:
|
||||
case nir_op_flt:
|
||||
case nir_op_fge:
|
||||
case nir_op_feq:
|
||||
case nir_op_fne:
|
||||
case nir_op_ilt:
|
||||
case nir_op_ult:
|
||||
case nir_op_ige:
|
||||
case nir_op_uge:
|
||||
case nir_op_ieq:
|
||||
case nir_op_ine:
|
||||
case nir_op_i2b1:
|
||||
case nir_op_f2b1:
|
||||
case nir_op_inot:
|
||||
case nir_op_fnot:
|
||||
return true;
|
||||
|
|
|
@ -212,9 +212,9 @@ nir_imm_bool(nir_builder *build, bool x)
|
|||
nir_const_value v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.u32[0] = x ? NIR_TRUE : NIR_FALSE;
|
||||
v.b[0] = x;
|
||||
|
||||
return nir_build_imm(build, 1, 32, v);
|
||||
return nir_build_imm(build, 1, 1, v);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
|
@ -976,13 +976,13 @@ nir_load_param(nir_builder *build, uint32_t param_idx)
|
|||
static inline nir_ssa_def *
|
||||
nir_f2b(nir_builder *build, nir_ssa_def *f)
|
||||
{
|
||||
return nir_f2b32(build, f);
|
||||
return nir_f2b1(build, f);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_i2b(nir_builder *build, nir_ssa_def *i)
|
||||
{
|
||||
return nir_i2b32(build, i);
|
||||
return nir_i2b1(build, i);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
|
|
|
@ -27,36 +27,6 @@ template = """\
|
|||
#define _NIR_BUILDER_OPCODES_
|
||||
|
||||
<%
|
||||
opcode_remap = {
|
||||
'flt' : 'flt32',
|
||||
'fge' : 'fge32',
|
||||
'feq' : 'feq32',
|
||||
'fne' : 'fne32',
|
||||
'ilt' : 'ilt32',
|
||||
'ige' : 'ige32',
|
||||
'ieq' : 'ieq32',
|
||||
'ine' : 'ine32',
|
||||
'ult' : 'ult32',
|
||||
'uge' : 'uge32',
|
||||
|
||||
'ball_iequal2' : 'b32all_iequal2',
|
||||
'ball_iequal3' : 'b32all_iequal3',
|
||||
'ball_iequal4' : 'b32all_iequal4',
|
||||
'bany_inequal2' : 'b32any_inequal2',
|
||||
'bany_inequal3' : 'b32any_inequal3',
|
||||
'bany_inequal4' : 'b32any_inequal4',
|
||||
'ball_fequal2' : 'b32all_fequal2',
|
||||
'ball_fequal3' : 'b32all_fequal3',
|
||||
'ball_fequal4' : 'b32all_fequal4',
|
||||
'bany_fnequal2' : 'b32any_fnequal2',
|
||||
'bany_fnequal3' : 'b32any_fnequal3',
|
||||
'bany_fnequal4' : 'b32any_fnequal4',
|
||||
|
||||
'bcsel' : 'b32csel',
|
||||
}
|
||||
|
||||
opcode_remap32 = { op32 : op for op, op32 in opcode_remap.items() }
|
||||
|
||||
def src_decl_list(num_srcs):
|
||||
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
|
||||
|
||||
|
@ -65,15 +35,8 @@ def src_list(num_srcs):
|
|||
%>
|
||||
|
||||
% for name, opcode in sorted(opcodes.items()):
|
||||
% if name in opcode_remap:
|
||||
<% continue %>
|
||||
% elif name in opcode_remap32:
|
||||
<% builder_name = opcode_remap32[name] %>
|
||||
% else:
|
||||
<% builder_name = name %>
|
||||
% endif
|
||||
static inline nir_ssa_def *
|
||||
nir_${builder_name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
|
||||
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
|
||||
{
|
||||
return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
|
||||
}
|
||||
|
|
|
@ -433,26 +433,26 @@ get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
|
|||
int32_t iter;
|
||||
|
||||
switch (cond_op) {
|
||||
case nir_op_ige32:
|
||||
case nir_op_ilt32:
|
||||
case nir_op_ieq32:
|
||||
case nir_op_ine32: {
|
||||
case nir_op_ige:
|
||||
case nir_op_ilt:
|
||||
case nir_op_ieq:
|
||||
case nir_op_ine: {
|
||||
int32_t initial_val = initial->i32[0];
|
||||
int32_t span = limit->i32[0] - initial_val;
|
||||
iter = span / step->i32[0];
|
||||
break;
|
||||
}
|
||||
case nir_op_uge32:
|
||||
case nir_op_ult32: {
|
||||
case nir_op_uge:
|
||||
case nir_op_ult: {
|
||||
uint32_t initial_val = initial->u32[0];
|
||||
uint32_t span = limit->u32[0] - initial_val;
|
||||
iter = span / step->u32[0];
|
||||
break;
|
||||
}
|
||||
case nir_op_fge32:
|
||||
case nir_op_flt32:
|
||||
case nir_op_feq32:
|
||||
case nir_op_fne32: {
|
||||
case nir_op_fge:
|
||||
case nir_op_flt:
|
||||
case nir_op_feq:
|
||||
case nir_op_fne: {
|
||||
float initial_val = initial->f32[0];
|
||||
float span = limit->f32[0] - initial_val;
|
||||
iter = span / step->f32[0];
|
||||
|
@ -623,10 +623,10 @@ find_trip_count(loop_info_state *state)
|
|||
bool limit_rhs = true;
|
||||
|
||||
switch (alu->op) {
|
||||
case nir_op_fge32: case nir_op_ige32: case nir_op_uge32:
|
||||
case nir_op_flt32: case nir_op_ilt32: case nir_op_ult32:
|
||||
case nir_op_feq32: case nir_op_ieq32:
|
||||
case nir_op_fne32: case nir_op_ine32:
|
||||
case nir_op_fge: case nir_op_ige: case nir_op_uge:
|
||||
case nir_op_flt: case nir_op_ilt: case nir_op_ult:
|
||||
case nir_op_feq: case nir_op_ieq:
|
||||
case nir_op_fne: case nir_op_ine:
|
||||
|
||||
/* We assume that the limit is the "right" operand */
|
||||
basic_ind = get_loop_var(alu->src[0].src.ssa, state);
|
||||
|
|
|
@ -158,7 +158,11 @@ nir_lower_two_sided_color_block(nir_block *block,
|
|||
* bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn))
|
||||
*/
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_ssa_def *face = nir_load_front_face(b);
|
||||
nir_ssa_def *face = nir_load_front_face(b);
|
||||
/* gl_FrontFace is a boolean but the intrinsic constructor creates
|
||||
* 32-bit value by default.
|
||||
*/
|
||||
face->bit_size = 1;
|
||||
nir_ssa_def *front = load_input(b, state->colors[idx].front);
|
||||
nir_ssa_def *back = load_input(b, state->colors[idx].back);
|
||||
nir_ssa_def *color = nir_bcsel(b, face, front, back);
|
||||
|
|
|
@ -609,7 +609,7 @@ can_propagate_through_alu(nir_src *src)
|
|||
case nir_op_inot:
|
||||
case nir_op_b2i32:
|
||||
return true;
|
||||
case nir_op_b32csel:
|
||||
case nir_op_bcsel:
|
||||
return src == &alu->src[0].src;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
|
|||
break;
|
||||
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_b32csel);
|
||||
nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
|
||||
nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
|
||||
/* Splat the condition to all channels */
|
||||
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
static bool
|
||||
opt_undef_csel(nir_alu_instr *instr)
|
||||
{
|
||||
if (instr->op != nir_op_b32csel && instr->op != nir_op_fcsel)
|
||||
if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel)
|
||||
return false;
|
||||
|
||||
assert(instr->dest.dest.is_ssa);
|
||||
|
|
|
@ -298,6 +298,16 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
|
|||
unsigned i, j;
|
||||
|
||||
switch (glsl_get_base_type(type)) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
/* Only float base types can be matrices. */
|
||||
assert(cols == 1);
|
||||
|
||||
for (i = 0; i < rows; i++) {
|
||||
if (i > 0) fprintf(fp, ", ");
|
||||
fprintf(fp, "%s", c->values[0].b[i] ? "true" : "false");
|
||||
}
|
||||
break;
|
||||
|
||||
case GLSL_TYPE_UINT8:
|
||||
case GLSL_TYPE_INT8:
|
||||
/* Only float base types can be matrices. */
|
||||
|
@ -322,7 +332,6 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
|
|||
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_BOOL:
|
||||
/* Only float base types can be matrices. */
|
||||
assert(cols == 1);
|
||||
|
||||
|
|
|
@ -95,9 +95,11 @@ static inline unsigned
|
|||
glsl_get_bit_size(const struct glsl_type *type)
|
||||
{
|
||||
switch (glsl_get_base_type(type)) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
return 1;
|
||||
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_BOOL:
|
||||
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
return 32;
|
||||
|
|
|
@ -1468,7 +1468,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
|||
opcode == SpvOpSpecConstantFalse)
|
||||
int_val = get_specialization(b, val, int_val);
|
||||
|
||||
val->constant->values[0].u32[0] = int_val ? NIR_TRUE : NIR_FALSE;
|
||||
val->constant->values[0].b[0] = int_val != 0;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -244,15 +244,15 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
|||
case SpvOpShiftRightArithmetic: return nir_op_ishr;
|
||||
case SpvOpShiftLeftLogical: return nir_op_ishl;
|
||||
case SpvOpLogicalOr: return nir_op_ior;
|
||||
case SpvOpLogicalEqual: return nir_op_ieq32;
|
||||
case SpvOpLogicalNotEqual: return nir_op_ine32;
|
||||
case SpvOpLogicalEqual: return nir_op_ieq;
|
||||
case SpvOpLogicalNotEqual: return nir_op_ine;
|
||||
case SpvOpLogicalAnd: return nir_op_iand;
|
||||
case SpvOpLogicalNot: return nir_op_inot;
|
||||
case SpvOpBitwiseOr: return nir_op_ior;
|
||||
case SpvOpBitwiseXor: return nir_op_ixor;
|
||||
case SpvOpBitwiseAnd: return nir_op_iand;
|
||||
case SpvOpSelect: return nir_op_b32csel;
|
||||
case SpvOpIEqual: return nir_op_ieq32;
|
||||
case SpvOpSelect: return nir_op_bcsel;
|
||||
case SpvOpIEqual: return nir_op_ieq;
|
||||
|
||||
case SpvOpBitFieldInsert: return nir_op_bitfield_insert;
|
||||
case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract;
|
||||
|
@ -264,27 +264,27 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
|||
* the logical operator to use since they also need to check if operands are
|
||||
* ordered.
|
||||
*/
|
||||
case SpvOpFOrdEqual: return nir_op_feq32;
|
||||
case SpvOpFUnordEqual: return nir_op_feq32;
|
||||
case SpvOpINotEqual: return nir_op_ine32;
|
||||
case SpvOpFOrdNotEqual: return nir_op_fne32;
|
||||
case SpvOpFUnordNotEqual: return nir_op_fne32;
|
||||
case SpvOpULessThan: return nir_op_ult32;
|
||||
case SpvOpSLessThan: return nir_op_ilt32;
|
||||
case SpvOpFOrdLessThan: return nir_op_flt32;
|
||||
case SpvOpFUnordLessThan: return nir_op_flt32;
|
||||
case SpvOpUGreaterThan: *swap = true; return nir_op_ult32;
|
||||
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt32;
|
||||
case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt32;
|
||||
case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt32;
|
||||
case SpvOpULessThanEqual: *swap = true; return nir_op_uge32;
|
||||
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige32;
|
||||
case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge32;
|
||||
case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge32;
|
||||
case SpvOpUGreaterThanEqual: return nir_op_uge32;
|
||||
case SpvOpSGreaterThanEqual: return nir_op_ige32;
|
||||
case SpvOpFOrdGreaterThanEqual: return nir_op_fge32;
|
||||
case SpvOpFUnordGreaterThanEqual: return nir_op_fge32;
|
||||
case SpvOpFOrdEqual: return nir_op_feq;
|
||||
case SpvOpFUnordEqual: return nir_op_feq;
|
||||
case SpvOpINotEqual: return nir_op_ine;
|
||||
case SpvOpFOrdNotEqual: return nir_op_fne;
|
||||
case SpvOpFUnordNotEqual: return nir_op_fne;
|
||||
case SpvOpULessThan: return nir_op_ult;
|
||||
case SpvOpSLessThan: return nir_op_ilt;
|
||||
case SpvOpFOrdLessThan: return nir_op_flt;
|
||||
case SpvOpFUnordLessThan: return nir_op_flt;
|
||||
case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
|
||||
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
|
||||
case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
|
||||
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
|
||||
case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpUGreaterThanEqual: return nir_op_uge;
|
||||
case SpvOpSGreaterThanEqual: return nir_op_ige;
|
||||
case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
|
||||
case SpvOpFUnordGreaterThanEqual: return nir_op_fge;
|
||||
|
||||
/* Conversions: */
|
||||
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
|
||||
|
@ -413,9 +413,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
} else {
|
||||
nir_op op;
|
||||
switch (src[0]->num_components) {
|
||||
case 2: op = nir_op_b32any_inequal2; break;
|
||||
case 3: op = nir_op_b32any_inequal3; break;
|
||||
case 4: op = nir_op_b32any_inequal4; break;
|
||||
case 2: op = nir_op_bany_inequal2; break;
|
||||
case 3: op = nir_op_bany_inequal3; break;
|
||||
case 4: op = nir_op_bany_inequal4; break;
|
||||
default: vtn_fail("invalid number of components");
|
||||
}
|
||||
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
|
||||
|
@ -430,9 +430,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
} else {
|
||||
nir_op op;
|
||||
switch (src[0]->num_components) {
|
||||
case 2: op = nir_op_b32all_iequal2; break;
|
||||
case 3: op = nir_op_b32all_iequal3; break;
|
||||
case 4: op = nir_op_b32all_iequal4; break;
|
||||
case 2: op = nir_op_ball_iequal2; break;
|
||||
case 3: op = nir_op_ball_iequal3; break;
|
||||
case 4: op = nir_op_ball_iequal4; break;
|
||||
default: vtn_fail("invalid number of components");
|
||||
}
|
||||
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
|
||||
|
|
Loading…
Reference in New Issue