pan/bi: Adapt builder to dest count

If there are no destinations, don't produce a _to version, and let the
bare version return the bi_instr.

If there are multiple destinations, take each in the _to version and
don't produce a bare version.

Both cares are probably what you wanted anyway.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9164>
This commit is contained in:
Alyssa Rosenzweig 2021-02-18 14:58:50 -05:00 committed by Marge Bot
parent ac3722fd83
commit 4f71801aa2
5 changed files with 43 additions and 51 deletions

View File

@ -40,15 +40,21 @@ def typesize(opcode):
return int(opcode[-2:])
except:
return None
def to_suffix(op):
return "_to" if op["dests"] > 0 else ""
%>
% for opcode in ops:
static inline
bi_instr * bi_${opcode.replace('.', '_').lower()}_to(${signature(ops[opcode], 1, modifiers)})
bi_instr * bi_${opcode.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${signature(ops[opcode], modifiers)})
{
bi_instr *I = rzalloc(b->shader, bi_instr);
I->op = BI_OPCODE_${opcode.replace('.', '_').upper()};
I->dest[0] = dest0;
% for dest in range(ops[opcode]["dests"]):
I->dest[${dest}] = dest${dest};
% endfor
% for src in range(src_count(ops[opcode])):
I->src[${src}] = src${src};
% endfor
@ -64,31 +70,31 @@ bi_instr * bi_${opcode.replace('.', '_').lower()}_to(${signature(ops[opcode], 1,
return I;
}
% if opcode.split(".")[0] not in ["JUMP", "BRANCHZ", "BRANCH"]:
% if ops[opcode]["dests"] == 1:
static inline
bi_index bi_${opcode.replace('.', '_').lower()}(${signature(ops[opcode], 0, modifiers)})
bi_index bi_${opcode.replace('.', '_').lower()}(${signature(ops[opcode], modifiers, no_dests=True)})
{
return (bi_${opcode.replace('.', '_').lower()}_to(${arguments(ops[opcode], 1)}))->dest[0];
return (bi_${opcode.replace('.', '_').lower()}_to(${arguments(ops[opcode])}))->dest[0];
}
%endif
<%
common_op = opcode.split('.')[0]
variants = [a for a in ops.keys() if a.split('.')[0] == common_op]
signatures = [signature(ops[op], 0, modifiers, sized=True) for op in variants]
signatures = [signature(ops[op], modifiers, sized=True, no_dests=True) for op in variants]
homogenous = all([sig == signatures[0] for sig in signatures])
sizes = [typesize(x) for x in variants]
last = opcode == variants[-1]
%>
% if homogenous and len(variants) > 1 and last:
% for (suffix, temp, dests, ret) in (('_to', False, 1, 'instr *'), ('', True, 0, 'index')):
% if not temp or common_op not in ["JUMP", "BRANCHZ", "BRANCH"]:
% if not temp or ops[opcode]["dests"] > 0:
static inline
bi_${ret} bi_${common_op.replace('.', '_').lower()}${suffix}(${signature(ops[opcode], dests, modifiers, sized=True)})
bi_${ret} bi_${common_op.replace('.', '_').lower()}${suffix if ops[opcode]['dests'] > 0 else ''}(${signature(ops[opcode], modifiers, sized=True, no_dests=not dests)})
{
% for i, (variant, size) in enumerate(zip(variants, sizes)):
${"else " if i > 0 else ""} if (bitsize == ${size})
return (bi_${variant.replace('.', '_').lower()}_to(${arguments(ops[opcode], 1, temp_dest = temp)}))${"->dest[0]" if temp else ""};
return (bi_${variant.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${arguments(ops[opcode], temp_dest = temp)}))${"->dest[0]" if temp else ""};
% endfor
else
unreachable("Invalid bitsize for ${common_op}");
@ -116,11 +122,11 @@ def should_skip(mod):
def modifier_signature(op):
return sorted([m for m in op["modifiers"].keys() if not should_skip(m)])
def signature(op, dest_count, modifiers, sized = False):
def signature(op, modifiers, sized = False, no_dests = False):
return ", ".join(
["bi_builder *b"] +
(["unsigned bitsize"] if sized else []) +
["bi_index dest{}".format(i) for i in range(dest_count)] +
["bi_index dest{}".format(i) for i in range(0 if no_dests else op["dests"])] +
["bi_index src{}".format(i) for i in range(src_count(op))] +
["{} {}".format(
"bool" if len(modifiers[T[0:-1]] if T[-1] in "0123" else modifiers[T]) == 2 else
@ -129,10 +135,10 @@ def signature(op, dest_count, modifiers, sized = False):
T) for T in modifier_signature(op)] +
["uint32_t {}".format(imm) for imm in op["immediates"]])
def arguments(op, dest_count, temp_dest = True):
def arguments(op, temp_dest = True):
return ", ".join(
["b"] +
["bi_temp(b->shader)" if temp_dest else 'dest{}'.format(i) for i in range(dest_count)] +
["bi_temp(b->shader)" if temp_dest else 'dest{}'.format(i) for i in range(op["dests"])] +
["src{}".format(i) for i in range(src_count(op))] +
modifier_signature(op) +
op["immediates"])

View File

@ -256,7 +256,7 @@ bi_spill_dest(bi_builder *b, bi_index index, bi_index temp, uint32_t offset,
b->cursor = bi_after_clause(clause);
/* setup FAU as [offset][0] */
bi_instr *st = bi_store_to(b, channels * 32, bi_null(), temp,
bi_instr *st = bi_store(b, channels * 32, temp,
bi_passthrough(BIFROST_SRC_FAU_LO),
bi_passthrough(BIFROST_SRC_FAU_HI),
BI_SEG_TL);

View File

@ -182,7 +182,7 @@ bi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause, struct
{
bi_instr *pinstr = tuple->add;
bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
bi_instr *atom_c = bi_atom_c_return_i32_to(&b, bi_null(),
bi_instr *atom_c = bi_atom_c_return_i32(&b,
pinstr->src[1], pinstr->src[2], pinstr->src[0],
pinstr->atom_opc);
@ -201,7 +201,7 @@ bi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause, struct
{
bi_instr *pinstr = tuple->add;
bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
bi_instr *atom_c = bi_atom_c1_return_i32_to(&b, bi_null(),
bi_instr *atom_c = bi_atom_c1_return_i32(&b,
pinstr->src[0], pinstr->src[1], pinstr->atom_opc);
if (bi_is_null(pinstr->dest[0]))

View File

@ -65,7 +65,7 @@ static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
static void
bi_emit_jump(bi_builder *b, nir_jump_instr *instr)
{
bi_instr *branch = bi_jump_to(b, bi_null(), bi_zero());
bi_instr *branch = bi_jump(b, bi_zero());
switch (instr->type) {
case nir_jump_break:
@ -528,7 +528,7 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
/* Jump back to the fragment shader, return address is stored
* in r48 (see above).
*/
bi_jump_to(b, bi_null(), bi_register(48));
bi_jump(b, bi_register(48));
}
}
@ -568,8 +568,8 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr));
assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
bi_st_cvt_to(b, bi_null(), bi_src_index(&instr->src[0]),
address, bi_word(address, 1), bi_word(address, 2),
bi_st_cvt(b, bi_src_index(&instr->src[0]), address,
bi_word(address, 1), bi_word(address, 2),
regfmt, nr - 1);
}
@ -609,8 +609,7 @@ bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
static void
bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
{
bi_store_to(b, instr->num_components * nir_src_bit_size(instr->src[0]),
bi_null(),
bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
bi_src_index(&instr->src[0]),
bi_src_index(&instr->src[1]), bi_addr_high(&instr->src[1]),
seg);
@ -831,8 +830,7 @@ bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr)
{
bi_index addr = bi_emit_lea_image(b, instr);
bi_st_cvt_to(b, bi_null(),
bi_src_index(&instr->src[3]),
bi_st_cvt(b, bi_src_index(&instr->src[3]),
addr, bi_word(addr, 1), bi_word(addr, 2),
bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr)),
instr->num_components - 1);
@ -1014,7 +1012,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_control_barrier:
assert(b->shader->stage != MESA_SHADER_FRAGMENT);
bi_barrier_to(b, bi_null());
bi_barrier(b);
break;
case nir_intrinsic_shared_atomic_add:
@ -1122,13 +1120,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
if (sz == 16)
src = bi_half(src, false);
bi_discard_f32_to(b, bi_null(), src, bi_zero(), BI_CMPF_NE);
bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE);
break;
}
case nir_intrinsic_discard:
bi_discard_f32_to(b, bi_null(), bi_zero(), bi_zero(),
BI_CMPF_EQ);
bi_discard_f32(b, bi_zero(), bi_zero(), BI_CMPF_EQ);
break;
case nir_intrinsic_load_ssbo_address:
@ -2071,9 +2068,9 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
bi_index *face, bi_index *s, bi_index *t)
{
/* Compute max { |x|, |y|, |z| } */
bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader), coord,
bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader),
bi_temp(b->shader), coord,
bi_word(coord, 1), bi_word(coord, 2));
cubeface->dest[1] = bi_temp(b->shader);
/* Select coordinates */
@ -2553,24 +2550,6 @@ emit_block(bi_context *ctx, nir_block *block)
return ctx->current_block;
}
/* Emits a direct branch based on a given condition. TODO: try to unwrap the
* condition to optimize */
static bi_instr *
bi_branch(bi_builder *b, nir_src *condition, bool invert)
{
return bi_branchz_i32_to(b, bi_null(), bi_src_index(condition),
bi_zero(), invert ? BI_CMPF_EQ : BI_CMPF_NE);
}
static bi_instr *
bi_jump(bi_builder *b, bi_block *target)
{
bi_instr *I = bi_jump_to(b, bi_null(), bi_zero());
I->branch_target = target;
return I;
}
static void
emit_if(bi_context *ctx, nir_if *nif)
{
@ -2578,7 +2557,8 @@ emit_if(bi_context *ctx, nir_if *nif)
/* Speculatively emit the branch, but we can't fill it in until later */
bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
bi_instr *then_branch = bi_branch(&_b, &nif->condition, true);
bi_instr *then_branch = bi_branchz_i32(&_b,
bi_src_index(&nif->condition), bi_zero(), BI_CMPF_EQ);
/* Emit the two subblocks. */
bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
@ -2604,7 +2584,8 @@ emit_if(bi_context *ctx, nir_if *nif)
/* Emit a jump from the end of the then block to the end of the else */
_b.cursor = bi_after_block(end_then_block);
bi_instr *then_exit = bi_jump(&_b, ctx->after_block);
bi_instr *then_exit = bi_jump(&_b, bi_zero());
then_exit->branch_target = ctx->after_block;
pan_block_add_successor(&end_then_block->base, &then_exit->branch_target->base);
pan_block_add_successor(&end_else_block->base, &ctx->after_block->base); /* fallthrough */
@ -2632,7 +2613,8 @@ emit_loop(bi_context *ctx, nir_loop *nloop)
/* Branch back to loop back */
bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
bi_jump(&_b, ctx->continue_block);
bi_instr *I = bi_jump(&_b, bi_zero());
I->branch_target = ctx->continue_block;
pan_block_add_successor(&start_block->base, &ctx->continue_block->base);
pan_block_add_successor(&ctx->current_block->base, &ctx->continue_block->base);

View File

@ -131,6 +131,7 @@ def parse_instruction(ins, include_pseudo):
'derived': [],
'staging': ins.attrib.get('staging', '').split('=')[0],
'staging_count': ins.attrib.get('staging', '=0').split('=')[1],
'dests': int(ins.attrib.get('dests', '1')),
'unused': ins.attrib.get('unused', False),
'pseudo': ins.attrib.get('pseudo', False),
'message': ins.attrib.get('message', 'none'),
@ -238,6 +239,7 @@ def simplify_to_ir(ins):
return {
'staging': ins['staging'],
'srcs': len(ins['srcs']),
'dests': ins['dests'],
'modifiers': [[m[0][0], m[2]] for m in ins['modifiers']],
'immediates': [m[0] for m in ins['immediates']]
}
@ -252,6 +254,7 @@ def combine_ir_variants(instructions, v):
for s in variants:
# Check consistency
assert(s['srcs'] == variants[0]['srcs'])
assert(s['dests'] == variants[0]['dests'])
assert(s['immediates'] == variants[0]['immediates'])
assert(s['staging'] == variants[0]['staging'])
@ -265,6 +268,7 @@ def combine_ir_variants(instructions, v):
# modifiers
return {
'srcs': variants[0]['srcs'],
'dests': variants[0]['dests'],
'staging': variants[0]['staging'],
'immediates': sorted(variants[0]['immediates']),
'modifiers': { k: modifiers[k] for k in modifiers }