diff --git a/src/panfrost/bifrost/bi_builder.h.py b/src/panfrost/bifrost/bi_builder.h.py index 5fbb48f180a..4791a1a30aa 100644 --- a/src/panfrost/bifrost/bi_builder.h.py +++ b/src/panfrost/bifrost/bi_builder.h.py @@ -40,15 +40,21 @@ def typesize(opcode): return int(opcode[-2:]) except: return None + +def to_suffix(op): + return "_to" if op["dests"] > 0 else "" + %> % for opcode in ops: static inline -bi_instr * bi_${opcode.replace('.', '_').lower()}_to(${signature(ops[opcode], 1, modifiers)}) +bi_instr * bi_${opcode.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${signature(ops[opcode], modifiers)}) { bi_instr *I = rzalloc(b->shader, bi_instr); I->op = BI_OPCODE_${opcode.replace('.', '_').upper()}; - I->dest[0] = dest0; +% for dest in range(ops[opcode]["dests"]): + I->dest[${dest}] = dest${dest}; +% endfor % for src in range(src_count(ops[opcode])): I->src[${src}] = src${src}; % endfor @@ -64,31 +70,31 @@ bi_instr * bi_${opcode.replace('.', '_').lower()}_to(${signature(ops[opcode], 1, return I; } -% if opcode.split(".")[0] not in ["JUMP", "BRANCHZ", "BRANCH"]: +% if ops[opcode]["dests"] == 1: static inline -bi_index bi_${opcode.replace('.', '_').lower()}(${signature(ops[opcode], 0, modifiers)}) +bi_index bi_${opcode.replace('.', '_').lower()}(${signature(ops[opcode], modifiers, no_dests=True)}) { - return (bi_${opcode.replace('.', '_').lower()}_to(${arguments(ops[opcode], 1)}))->dest[0]; + return (bi_${opcode.replace('.', '_').lower()}_to(${arguments(ops[opcode])}))->dest[0]; } %endif <% common_op = opcode.split('.')[0] variants = [a for a in ops.keys() if a.split('.')[0] == common_op] - signatures = [signature(ops[op], 0, modifiers, sized=True) for op in variants] + signatures = [signature(ops[op], modifiers, sized=True, no_dests=True) for op in variants] homogenous = all([sig == signatures[0] for sig in signatures]) sizes = [typesize(x) for x in variants] last = opcode == variants[-1] %> % if homogenous and len(variants) > 1 and last: % for (suffix, temp, dests, ret) in (('_to', False, 1, 'instr *'), ('', True, 0, 'index')): -% if not temp or common_op not in ["JUMP", "BRANCHZ", "BRANCH"]: +% if not temp or ops[opcode]["dests"] > 0: static inline -bi_${ret} bi_${common_op.replace('.', '_').lower()}${suffix}(${signature(ops[opcode], dests, modifiers, sized=True)}) +bi_${ret} bi_${common_op.replace('.', '_').lower()}${suffix if ops[opcode]['dests'] > 0 else ''}(${signature(ops[opcode], modifiers, sized=True, no_dests=not dests)}) { % for i, (variant, size) in enumerate(zip(variants, sizes)): ${"else " if i > 0 else ""} if (bitsize == ${size}) - return (bi_${variant.replace('.', '_').lower()}_to(${arguments(ops[opcode], 1, temp_dest = temp)}))${"->dest[0]" if temp else ""}; + return (bi_${variant.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${arguments(ops[opcode], temp_dest = temp)}))${"->dest[0]" if temp else ""}; % endfor else unreachable("Invalid bitsize for ${common_op}"); @@ -116,11 +122,11 @@ def should_skip(mod): def modifier_signature(op): return sorted([m for m in op["modifiers"].keys() if not should_skip(m)]) -def signature(op, dest_count, modifiers, sized = False): +def signature(op, modifiers, sized = False, no_dests = False): return ", ".join( ["bi_builder *b"] + (["unsigned bitsize"] if sized else []) + - ["bi_index dest{}".format(i) for i in range(dest_count)] + + ["bi_index dest{}".format(i) for i in range(0 if no_dests else op["dests"])] + ["bi_index src{}".format(i) for i in range(src_count(op))] + ["{} {}".format( "bool" if len(modifiers[T[0:-1]] if T[-1] in "0123" else modifiers[T]) == 2 else @@ -129,10 +135,10 @@ def signature(op, dest_count, modifiers, sized = False): T) for T in modifier_signature(op)] + ["uint32_t {}".format(imm) for imm in op["immediates"]]) -def arguments(op, dest_count, temp_dest = True): +def arguments(op, temp_dest = True): return ", ".join( ["b"] + - ["bi_temp(b->shader)" if temp_dest else 'dest{}'.format(i) for i in range(dest_count)] + + ["bi_temp(b->shader)" if temp_dest else 'dest{}'.format(i) for i in range(op["dests"])] + ["src{}".format(i) for i in range(src_count(op))] + modifier_signature(op) + op["immediates"]) diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index 742bd1de3a1..2843b04d219 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -256,7 +256,7 @@ bi_spill_dest(bi_builder *b, bi_index index, bi_index temp, uint32_t offset, b->cursor = bi_after_clause(clause); /* setup FAU as [offset][0] */ - bi_instr *st = bi_store_to(b, channels * 32, bi_null(), temp, + bi_instr *st = bi_store(b, channels * 32, temp, bi_passthrough(BIFROST_SRC_FAU_LO), bi_passthrough(BIFROST_SRC_FAU_HI), BI_SEG_TL); diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index 3bb7b87dcb6..4104d00c47c 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -182,7 +182,7 @@ bi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause, struct { bi_instr *pinstr = tuple->add; bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); - bi_instr *atom_c = bi_atom_c_return_i32_to(&b, bi_null(), + bi_instr *atom_c = bi_atom_c_return_i32(&b, pinstr->src[1], pinstr->src[2], pinstr->src[0], pinstr->atom_opc); @@ -201,7 +201,7 @@ bi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause, struct { bi_instr *pinstr = tuple->add; bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); - bi_instr *atom_c = bi_atom_c1_return_i32_to(&b, bi_null(), + bi_instr *atom_c = bi_atom_c1_return_i32(&b, pinstr->src[0], pinstr->src[1], pinstr->atom_opc); if (bi_is_null(pinstr->dest[0])) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index c7fde1ce20f..a15dd327a0f 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -65,7 +65,7 @@ static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list); static void bi_emit_jump(bi_builder *b, nir_jump_instr *instr) { - bi_instr *branch = bi_jump_to(b, bi_null(), bi_zero()); + bi_instr *branch = bi_jump(b, bi_zero()); switch (instr->type) { case nir_jump_break: @@ -528,7 +528,7 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) /* Jump back to the fragment shader, return address is stored * in r48 (see above). */ - bi_jump_to(b, bi_null(), bi_register(48)); + bi_jump(b, bi_register(48)); } } @@ -568,8 +568,8 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr)); assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0)); - bi_st_cvt_to(b, bi_null(), bi_src_index(&instr->src[0]), - address, bi_word(address, 1), bi_word(address, 2), + bi_st_cvt(b, bi_src_index(&instr->src[0]), address, + bi_word(address, 1), bi_word(address, 2), regfmt, nr - 1); } @@ -609,8 +609,7 @@ bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg) static void bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg) { - bi_store_to(b, instr->num_components * nir_src_bit_size(instr->src[0]), - bi_null(), + bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]), bi_src_index(&instr->src[0]), bi_src_index(&instr->src[1]), bi_addr_high(&instr->src[1]), seg); @@ -831,8 +830,7 @@ bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr) { bi_index addr = bi_emit_lea_image(b, instr); - bi_st_cvt_to(b, bi_null(), - bi_src_index(&instr->src[3]), + bi_st_cvt(b, bi_src_index(&instr->src[3]), addr, bi_word(addr, 1), bi_word(addr, 2), bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr)), instr->num_components - 1); @@ -1014,7 +1012,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_control_barrier: assert(b->shader->stage != MESA_SHADER_FRAGMENT); - bi_barrier_to(b, bi_null()); + bi_barrier(b); break; case nir_intrinsic_shared_atomic_add: @@ -1122,13 +1120,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) if (sz == 16) src = bi_half(src, false); - bi_discard_f32_to(b, bi_null(), src, bi_zero(), BI_CMPF_NE); + bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE); break; } case nir_intrinsic_discard: - bi_discard_f32_to(b, bi_null(), bi_zero(), bi_zero(), - BI_CMPF_EQ); + bi_discard_f32(b, bi_zero(), bi_zero(), BI_CMPF_EQ); break; case nir_intrinsic_load_ssbo_address: @@ -2071,9 +2068,9 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s, bi_index *t) { /* Compute max { |x|, |y|, |z| } */ - bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader), coord, + bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader), + bi_temp(b->shader), coord, bi_word(coord, 1), bi_word(coord, 2)); - cubeface->dest[1] = bi_temp(b->shader); /* Select coordinates */ @@ -2553,24 +2550,6 @@ emit_block(bi_context *ctx, nir_block *block) return ctx->current_block; } -/* Emits a direct branch based on a given condition. TODO: try to unwrap the - * condition to optimize */ - -static bi_instr * -bi_branch(bi_builder *b, nir_src *condition, bool invert) -{ - return bi_branchz_i32_to(b, bi_null(), bi_src_index(condition), - bi_zero(), invert ? BI_CMPF_EQ : BI_CMPF_NE); -} - -static bi_instr * -bi_jump(bi_builder *b, bi_block *target) -{ - bi_instr *I = bi_jump_to(b, bi_null(), bi_zero()); - I->branch_target = target; - return I; -} - static void emit_if(bi_context *ctx, nir_if *nif) { @@ -2578,7 +2557,8 @@ emit_if(bi_context *ctx, nir_if *nif) /* Speculatively emit the branch, but we can't fill it in until later */ bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); - bi_instr *then_branch = bi_branch(&_b, &nif->condition, true); + bi_instr *then_branch = bi_branchz_i32(&_b, + bi_src_index(&nif->condition), bi_zero(), BI_CMPF_EQ); /* Emit the two subblocks. */ bi_block *then_block = emit_cf_list(ctx, &nif->then_list); @@ -2604,7 +2584,8 @@ emit_if(bi_context *ctx, nir_if *nif) /* Emit a jump from the end of the then block to the end of the else */ _b.cursor = bi_after_block(end_then_block); - bi_instr *then_exit = bi_jump(&_b, ctx->after_block); + bi_instr *then_exit = bi_jump(&_b, bi_zero()); + then_exit->branch_target = ctx->after_block; pan_block_add_successor(&end_then_block->base, &then_exit->branch_target->base); pan_block_add_successor(&end_else_block->base, &ctx->after_block->base); /* fallthrough */ @@ -2632,7 +2613,8 @@ emit_loop(bi_context *ctx, nir_loop *nloop) /* Branch back to loop back */ bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); - bi_jump(&_b, ctx->continue_block); + bi_instr *I = bi_jump(&_b, bi_zero()); + I->branch_target = ctx->continue_block; pan_block_add_successor(&start_block->base, &ctx->continue_block->base); pan_block_add_successor(&ctx->current_block->base, &ctx->continue_block->base); diff --git a/src/panfrost/bifrost/bifrost_isa.py b/src/panfrost/bifrost/bifrost_isa.py index a2d7f882e39..ae97795f3ae 100644 --- a/src/panfrost/bifrost/bifrost_isa.py +++ b/src/panfrost/bifrost/bifrost_isa.py @@ -131,6 +131,7 @@ def parse_instruction(ins, include_pseudo): 'derived': [], 'staging': ins.attrib.get('staging', '').split('=')[0], 'staging_count': ins.attrib.get('staging', '=0').split('=')[1], + 'dests': int(ins.attrib.get('dests', '1')), 'unused': ins.attrib.get('unused', False), 'pseudo': ins.attrib.get('pseudo', False), 'message': ins.attrib.get('message', 'none'), @@ -238,6 +239,7 @@ def simplify_to_ir(ins): return { 'staging': ins['staging'], 'srcs': len(ins['srcs']), + 'dests': ins['dests'], 'modifiers': [[m[0][0], m[2]] for m in ins['modifiers']], 'immediates': [m[0] for m in ins['immediates']] } @@ -252,6 +254,7 @@ def combine_ir_variants(instructions, v): for s in variants: # Check consistency assert(s['srcs'] == variants[0]['srcs']) + assert(s['dests'] == variants[0]['dests']) assert(s['immediates'] == variants[0]['immediates']) assert(s['staging'] == variants[0]['staging']) @@ -265,6 +268,7 @@ def combine_ir_variants(instructions, v): # modifiers return { 'srcs': variants[0]['srcs'], + 'dests': variants[0]['dests'], 'staging': variants[0]['staging'], 'immediates': sorted(variants[0]['immediates']), 'modifiers': { k: modifiers[k] for k in modifiers }