pan/bi: Use canonical terminology for tuple
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8723>
This commit is contained in:
parent
f74dda9377
commit
ce2ef3dad6
|
@ -23,31 +23,31 @@
|
|||
|
||||
#include "compiler.h"
|
||||
|
||||
/* The scheduler packs multiple instructions into a clause (grouped as bundle),
|
||||
/* The scheduler packs multiple instructions into a clause (grouped as tuple),
|
||||
* and the packing code takes in a clause and emits it to the wire. During
|
||||
* scheduling, we need to lay out the instructions (bundles) and constants
|
||||
* scheduling, we need to lay out the instructions (tuples) and constants
|
||||
* within the clause so constraints can be resolved during scheduling instead
|
||||
* of failing packing. These routines will help building clauses from
|
||||
* instructions so the scheduler can focus on the high-level algorithm, and
|
||||
* manipulating clause layouts.
|
||||
*/
|
||||
|
||||
/* Helper to see if a bundle can be inserted. We must satisfy the invariant:
|
||||
/* Helper to see if a tuple can be inserted. We must satisfy the invariant:
|
||||
*
|
||||
* constant_count + bundle_count <= 13
|
||||
* constant_count + tuple_count <= 13
|
||||
*
|
||||
* ...which is equivalent to the clause ending up with 8 or fewer quardwords.
|
||||
* Inserting a bundle increases bundle_count by one, and if it reads a unique
|
||||
* Inserting a tuple increases tuple_count by one, and if it reads a unique
|
||||
* constant, it increases constant_count by one.
|
||||
*/
|
||||
|
||||
bool
|
||||
bi_can_insert_bundle(bi_clause *clause, bool constant)
|
||||
bi_can_insert_tuple(bi_clause *clause, bool constant)
|
||||
{
|
||||
unsigned constant_count = clause->constant_count + (constant ? 1 : 0);
|
||||
unsigned bundle_count = clause->bundle_count + 1;
|
||||
unsigned tuple_count = clause->tuple_count + 1;
|
||||
|
||||
return (constant_count + bundle_count) <= 13;
|
||||
return (constant_count + tuple_count) <= 13;
|
||||
}
|
||||
|
||||
/* Helper to calculate the number of quadwords in a clause. This is a function
|
||||
|
@ -79,7 +79,7 @@ bi_can_insert_bundle(bi_clause *clause, bool constant)
|
|||
unsigned
|
||||
bi_clause_quadwords(bi_clause *clause)
|
||||
{
|
||||
unsigned X = clause->bundle_count;
|
||||
unsigned X = clause->tuple_count;
|
||||
unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
|
||||
|
||||
unsigned constants = clause->constant_count;
|
||||
|
|
|
@ -56,8 +56,8 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd
|
|||
}
|
||||
|
||||
/* The uniform/constant slot allows loading a contiguous 64-bit immediate or
|
||||
* pushed uniform per bundle. Figure out which one we need in the bundle (the
|
||||
* scheduler needs to ensure we only have one type per bundle), validate
|
||||
* pushed uniform per tuple. Figure out which one we need in the tuple (the
|
||||
* scheduler needs to ensure we only have one type per tuple), validate
|
||||
* everything, and rewrite away the register/uniform indices to use 3-bit
|
||||
* sources directly. */
|
||||
|
||||
|
@ -181,12 +181,12 @@ bi_assign_fau_idx_single(bi_registers *regs,
|
|||
|
||||
static void
|
||||
bi_assign_fau_idx(bi_clause *clause,
|
||||
bi_bundle *bundle)
|
||||
bi_tuple *tuple)
|
||||
{
|
||||
bool assigned =
|
||||
bi_assign_fau_idx_single(&bundle->regs, clause, bundle->fma, false, true);
|
||||
bi_assign_fau_idx_single(&tuple->regs, clause, tuple->fma, false, true);
|
||||
|
||||
bi_assign_fau_idx_single(&bundle->regs, clause, bundle->add, assigned, false);
|
||||
bi_assign_fau_idx_single(&tuple->regs, clause, tuple->add, assigned, false);
|
||||
}
|
||||
|
||||
/* Assigns a slot for reading, before anything is written */
|
||||
|
@ -228,7 +228,7 @@ bi_assign_slot_read(bi_registers *regs, bi_index src)
|
|||
}
|
||||
|
||||
static bi_registers
|
||||
bi_assign_slots(bi_bundle *now, bi_bundle *prev)
|
||||
bi_assign_slots(bi_tuple *now, bi_tuple *prev)
|
||||
{
|
||||
/* We assign slots for the main register mechanism. Special ops
|
||||
* use the data registers, which has its own mechanism entirely
|
||||
|
@ -388,7 +388,7 @@ bi_pack_registers(bi_registers regs)
|
|||
return packed;
|
||||
}
|
||||
|
||||
struct bi_packed_bundle {
|
||||
struct bi_packed_tuple {
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
};
|
||||
|
@ -411,9 +411,9 @@ bi_flip_slots(bi_registers *regs)
|
|||
* doesn't have to worry about this while we're just packing singletons */
|
||||
|
||||
static void
|
||||
bi_lower_cubeface2(bi_context *ctx, bi_bundle *bundle)
|
||||
bi_lower_cubeface2(bi_context *ctx, bi_tuple *tuple)
|
||||
{
|
||||
bi_instr *old = bundle->add;
|
||||
bi_instr *old = tuple->add;
|
||||
|
||||
/* Filter for +CUBEFACE2 */
|
||||
if (!old || old->op != BI_OPCODE_CUBEFACE2)
|
||||
|
@ -421,7 +421,7 @@ bi_lower_cubeface2(bi_context *ctx, bi_bundle *bundle)
|
|||
|
||||
/* This won't be used once we emit non-singletons, for now this is just
|
||||
* a fact of our scheduler and allows us to clobber FMA */
|
||||
assert(!bundle->fma);
|
||||
assert(!tuple->fma);
|
||||
|
||||
/* Construct an FMA op */
|
||||
bi_instr *new = rzalloc(ctx, bi_instr);
|
||||
|
@ -433,7 +433,7 @@ bi_lower_cubeface2(bi_context *ctx, bi_bundle *bundle)
|
|||
|
||||
/* Emit the instruction */
|
||||
list_addtail(&new->link, &old->link);
|
||||
bundle->fma = new;
|
||||
tuple->fma = new;
|
||||
|
||||
/* Now replace the sources of the CUBEFACE2 with a single passthrough
|
||||
* from the CUBEFACE1 (and a side-channel) */
|
||||
|
@ -474,33 +474,33 @@ bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
|
|||
}
|
||||
}
|
||||
|
||||
static struct bi_packed_bundle
|
||||
bi_pack_bundle(bi_clause *clause, bi_bundle *bundle, bi_bundle *prev, bool first_bundle, gl_shader_stage stage)
|
||||
static struct bi_packed_tuple
|
||||
bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
|
||||
{
|
||||
bi_assign_slots(bundle, prev);
|
||||
bi_assign_fau_idx(clause, bundle);
|
||||
bundle->regs.first_instruction = first_bundle;
|
||||
bi_assign_slots(tuple, prev);
|
||||
bi_assign_fau_idx(clause, tuple);
|
||||
tuple->regs.first_instruction = first_tuple;
|
||||
|
||||
bi_flip_slots(&bundle->regs);
|
||||
bi_flip_slots(&tuple->regs);
|
||||
|
||||
bool sr_read = bundle->add &&
|
||||
bi_opcode_props[(bundle->add)->op].sr_read;
|
||||
bool sr_read = tuple->add &&
|
||||
bi_opcode_props[(tuple->add)->op].sr_read;
|
||||
|
||||
uint64_t reg = bi_pack_registers(bundle->regs);
|
||||
uint64_t fma = bi_pack_fma(bundle->fma,
|
||||
bi_get_src_new(bundle->fma, &bundle->regs, 0),
|
||||
bi_get_src_new(bundle->fma, &bundle->regs, 1),
|
||||
bi_get_src_new(bundle->fma, &bundle->regs, 2),
|
||||
bi_get_src_new(bundle->fma, &bundle->regs, 3));
|
||||
uint64_t reg = bi_pack_registers(tuple->regs);
|
||||
uint64_t fma = bi_pack_fma(tuple->fma,
|
||||
bi_get_src_new(tuple->fma, &tuple->regs, 0),
|
||||
bi_get_src_new(tuple->fma, &tuple->regs, 1),
|
||||
bi_get_src_new(tuple->fma, &tuple->regs, 2),
|
||||
bi_get_src_new(tuple->fma, &tuple->regs, 3));
|
||||
|
||||
uint64_t add = bi_pack_add(bundle->add,
|
||||
bi_get_src_new(bundle->add, &bundle->regs, sr_read + 0),
|
||||
bi_get_src_new(bundle->add, &bundle->regs, sr_read + 1),
|
||||
bi_get_src_new(bundle->add, &bundle->regs, sr_read + 2),
|
||||
uint64_t add = bi_pack_add(tuple->add,
|
||||
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),
|
||||
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),
|
||||
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2),
|
||||
0);
|
||||
|
||||
if (bundle->add) {
|
||||
bi_instr *add = bundle->add;
|
||||
if (tuple->add) {
|
||||
bi_instr *add = tuple->add;
|
||||
|
||||
bool sr_write = bi_opcode_props[add->op].sr_write;
|
||||
|
||||
|
@ -516,7 +516,7 @@ bi_pack_bundle(bi_clause *clause, bi_bundle *bundle, bi_bundle *prev, bool first
|
|||
}
|
||||
}
|
||||
|
||||
struct bi_packed_bundle packed = {
|
||||
struct bi_packed_tuple packed = {
|
||||
.lo = reg | (fma << 35) | ((add & 0b111111) << 58),
|
||||
.hi = add >> 6
|
||||
};
|
||||
|
@ -551,12 +551,12 @@ bi_pack_constants(bi_context *ctx, bi_clause *clause,
|
|||
bool branches = clause->branch_constant && done;
|
||||
|
||||
/* TODO: Pos */
|
||||
assert(index == 0 && clause->bundle_count == 1);
|
||||
assert(index == 0 && clause->tuple_count == 1);
|
||||
assert(only);
|
||||
|
||||
/* Compute branch offset instead of a dummy 0 */
|
||||
if (branches) {
|
||||
bi_instr *br = clause->bundles[clause->bundle_count - 1].add;
|
||||
bi_instr *br = clause->tuples[clause->tuple_count - 1].add;
|
||||
assert(br && br->branch_target);
|
||||
|
||||
/* Put it in the high place */
|
||||
|
@ -609,10 +609,10 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
|
|||
bool tdd)
|
||||
{
|
||||
/* TODO After the deadline lowering */
|
||||
bi_lower_cubeface2(ctx, &clause->bundles[0]);
|
||||
bi_lower_cubeface2(ctx, &clause->tuples[0]);
|
||||
|
||||
struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, &clause->bundles[0], &clause->bundles[0], true, stage);
|
||||
assert(clause->bundle_count == 1);
|
||||
struct bi_packed_tuple ins_1 = bi_pack_tuple(clause, &clause->tuples[0], &clause->tuples[0], true, stage);
|
||||
assert(clause->tuple_count == 1);
|
||||
|
||||
/* State for packing constants throughout */
|
||||
unsigned constant_index = 0;
|
||||
|
@ -657,14 +657,14 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
|
|||
if (ctx->is_blend)
|
||||
return;
|
||||
|
||||
const bi_bundle *bundle = &clause->bundles[clause->bundle_count - 1];
|
||||
const bi_instr *ins = bundle->add;
|
||||
const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];
|
||||
const bi_instr *ins = tuple->add;
|
||||
|
||||
if (!ins || ins->op != BI_OPCODE_BLEND)
|
||||
return;
|
||||
|
||||
|
||||
unsigned loc = bundle->regs.fau_idx - BIR_FAU_BLEND_0;
|
||||
unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
|
||||
assert(loc < ARRAY_SIZE(ctx->blend_ret_offsets));
|
||||
assert(!ctx->blend_ret_offsets[loc]);
|
||||
ctx->blend_ret_offsets[loc] =
|
||||
|
|
|
@ -65,9 +65,9 @@ bi_print_slots(bi_registers *regs, FILE *fp)
|
|||
}
|
||||
|
||||
void
|
||||
bi_print_bundle(bi_bundle *bundle, FILE *fp)
|
||||
bi_print_tuple(bi_tuple *tuple, FILE *fp)
|
||||
{
|
||||
bi_instr *ins[2] = { bundle->fma, bundle->add };
|
||||
bi_instr *ins[2] = { tuple->fma, tuple->add };
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
if (ins[i])
|
||||
|
@ -103,8 +103,8 @@ bi_print_clause(bi_clause *clause, FILE *fp)
|
|||
|
||||
fprintf(fp, "\n");
|
||||
|
||||
for (unsigned i = 0; i < clause->bundle_count; ++i)
|
||||
bi_print_bundle(&clause->bundles[i], fp);
|
||||
for (unsigned i = 0; i < clause->tuple_count; ++i)
|
||||
bi_print_tuple(&clause->tuples[i], fp);
|
||||
|
||||
if (clause->constant_count) {
|
||||
for (unsigned i = 0; i < clause->constant_count; ++i)
|
||||
|
|
|
@ -82,16 +82,16 @@ bi_singleton(void *memctx, bi_instr *ins,
|
|||
bool osrb)
|
||||
{
|
||||
bi_clause *u = rzalloc(memctx, bi_clause);
|
||||
u->bundle_count = 1;
|
||||
u->tuple_count = 1;
|
||||
|
||||
ASSERTED bool can_fma = bi_opcode_props[ins->op].fma;
|
||||
bool can_add = bi_opcode_props[ins->op].add;
|
||||
assert(can_fma || can_add);
|
||||
|
||||
if (can_add)
|
||||
u->bundles[0].add = ins;
|
||||
u->tuples[0].add = ins;
|
||||
else
|
||||
u->bundles[0].fma = ins;
|
||||
u->tuples[0].fma = ins;
|
||||
|
||||
u->scoreboard_id = scoreboard_id;
|
||||
u->staging_barrier = osrb;
|
||||
|
@ -115,7 +115,7 @@ bi_singleton(void *memctx, bi_instr *ins,
|
|||
unsigned value = ins->src[s].value;
|
||||
|
||||
/* Allow fast zero */
|
||||
if (value == 0 && u->bundles[0].fma) continue;
|
||||
if (value == 0 && u->tuples[0].fma) continue;
|
||||
|
||||
if (constant_count == 0) {
|
||||
combined_constant = ins->src[s].value;
|
||||
|
|
|
@ -2041,13 +2041,13 @@ bi_print_stats(bi_context *ctx, FILE *fp)
|
|||
|
||||
bi_foreach_clause_in_block(block, clause) {
|
||||
nr_clauses++;
|
||||
nr_tuples += clause->bundle_count;
|
||||
nr_tuples += clause->tuple_count;
|
||||
|
||||
for (unsigned i = 0; i < clause->bundle_count; ++i) {
|
||||
if (clause->bundles[i].fma)
|
||||
for (unsigned i = 0; i < clause->tuple_count; ++i) {
|
||||
if (clause->tuples[i].fma)
|
||||
nr_ins++;
|
||||
|
||||
if (clause->bundles[i].add)
|
||||
if (clause->tuples[i].add)
|
||||
nr_ins++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -377,7 +377,7 @@ typedef struct {
|
|||
};
|
||||
} bi_instr;
|
||||
|
||||
/* Represents the assignment of slots for a given bi_bundle */
|
||||
/* Represents the assignment of slots for a given bi_tuple */
|
||||
|
||||
typedef struct {
|
||||
/* Register to assign to each slot */
|
||||
|
@ -396,9 +396,9 @@ typedef struct {
|
|||
bool first_instruction;
|
||||
} bi_registers;
|
||||
|
||||
/* A bi_bundle contains two paired instruction pointers. If a slot is unfilled,
|
||||
/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
|
||||
* leave it NULL; the emitter will fill in a nop. Instructions reference
|
||||
* registers via slots which are assigned per bundle.
|
||||
* registers via slots which are assigned per tuple.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
|
@ -406,7 +406,7 @@ typedef struct {
|
|||
bi_registers regs;
|
||||
bi_instr *fma;
|
||||
bi_instr *add;
|
||||
} bi_bundle;
|
||||
} bi_tuple;
|
||||
|
||||
struct bi_block;
|
||||
|
||||
|
@ -416,11 +416,11 @@ typedef struct {
|
|||
/* Link back up for branch calculations */
|
||||
struct bi_block *block;
|
||||
|
||||
/* A clause can have 8 instructions in bundled FMA/ADD sense, so there
|
||||
* can be 8 bundles. */
|
||||
/* A clause can have 8 instructions in tupled FMA/ADD sense, so there
|
||||
* can be 8 tuples. */
|
||||
|
||||
unsigned bundle_count;
|
||||
bi_bundle bundles[8];
|
||||
unsigned tuple_count;
|
||||
bi_tuple tuples[8];
|
||||
|
||||
/* For scoreboarding -- the clause ID (this is not globally unique!)
|
||||
* and its dependencies in terms of other clauses, computed during
|
||||
|
@ -446,9 +446,9 @@ typedef struct {
|
|||
|
||||
/* Constants read by this clause. ISA limit. Must satisfy:
|
||||
*
|
||||
* constant_count + bundle_count <= 13
|
||||
* constant_count + tuple_count <= 13
|
||||
*
|
||||
* Also implicitly constant_count <= bundle_count since a bundle only
|
||||
* Also implicitly constant_count <= tuple_count since a tuple only
|
||||
* reads a single constant.
|
||||
*/
|
||||
uint64_t constants[8];
|
||||
|
@ -713,7 +713,7 @@ bi_clause * bi_next_clause(bi_context *ctx, pan_block *block, bi_clause *clause)
|
|||
|
||||
void bi_print_instr(bi_instr *I, FILE *fp);
|
||||
void bi_print_slots(bi_registers *regs, FILE *fp);
|
||||
void bi_print_bundle(bi_bundle *bundle, FILE *fp);
|
||||
void bi_print_tuple(bi_tuple *tuple, FILE *fp);
|
||||
void bi_print_clause(bi_clause *clause, FILE *fp);
|
||||
void bi_print_block(bi_block *block, FILE *fp);
|
||||
void bi_print_shader(bi_context *ctx, FILE *fp);
|
||||
|
@ -742,7 +742,7 @@ void bi_invalidate_liveness(bi_context *ctx);
|
|||
|
||||
/* Layout */
|
||||
|
||||
bool bi_can_insert_bundle(bi_clause *clause, bool constant);
|
||||
bool bi_can_insert_tuple(bi_clause *clause, bool constant);
|
||||
unsigned bi_clause_quadwords(bi_clause *clause);
|
||||
signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
|
||||
|
||||
|
@ -808,15 +808,15 @@ bi_after_instr(bi_instr *instr)
|
|||
static inline bi_instr *
|
||||
bi_first_instr_in_clause(bi_clause *clause)
|
||||
{
|
||||
bi_bundle bundle = clause->bundles[0];
|
||||
return bundle.fma ?: bundle.add;
|
||||
bi_tuple tuple = clause->tuples[0];
|
||||
return tuple.fma ?: tuple.add;
|
||||
}
|
||||
|
||||
static inline bi_instr *
|
||||
bi_last_instr_in_clause(bi_clause *clause)
|
||||
{
|
||||
bi_bundle bundle = clause->bundles[clause->bundle_count - 1];
|
||||
return bundle.add ?: bundle.fma;
|
||||
bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
|
||||
return tuple.add ?: tuple.fma;
|
||||
}
|
||||
|
||||
/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
|
||||
|
|
Loading…
Reference in New Issue