mirror of https://gitlab.freedesktop.org/mesa/mesa
303 lines
8.4 KiB
C
303 lines
8.4 KiB
C
/*
|
|
* Copyright (C) 2022 Collabora Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
* Authors (Collabora):
|
|
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
|
*/
|
|
|
|
/* Bottom-up local scheduler to reduce register pressure */
|
|
|
|
#include "util/dag.h"
|
|
#include "compiler.h"
|
|
|
|
struct sched_ctx {
|
|
/* Dependency graph */
|
|
struct dag *dag;
|
|
|
|
/* Live set */
|
|
BITSET_WORD *live;
|
|
};
|
|
|
|
struct sched_node {
|
|
struct dag_node dag;
|
|
|
|
/* Instruction this node represents */
|
|
bi_instr *instr;
|
|
};
|
|
|
|
static void
|
|
add_dep(struct sched_node *a, struct sched_node *b)
|
|
{
|
|
if (a && b)
|
|
dag_add_edge(&a->dag, &b->dag, 0);
|
|
}
|
|
|
|
static struct dag *
|
|
create_dag(bi_context *ctx, bi_block *block, void *memctx)
|
|
{
|
|
struct dag *dag = dag_create(ctx);
|
|
|
|
struct sched_node **last_write =
|
|
calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
|
|
struct sched_node *coverage = NULL;
|
|
struct sched_node *preload = NULL;
|
|
|
|
/* Last memory load, to serialize stores against */
|
|
struct sched_node *memory_load = NULL;
|
|
|
|
/* Last memory store, to serialize loads and stores against */
|
|
struct sched_node *memory_store = NULL;
|
|
|
|
bi_foreach_instr_in_block(block, I) {
|
|
/* Leave branches at the end */
|
|
if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
|
|
break;
|
|
|
|
assert(I->branch_target == NULL);
|
|
|
|
struct sched_node *node = rzalloc(memctx, struct sched_node);
|
|
node->instr = I;
|
|
dag_init_node(dag, &node->dag);
|
|
|
|
/* Reads depend on writes, no other hazards in SSA */
|
|
bi_foreach_ssa_src(I, s)
|
|
add_dep(node, last_write[I->src[s].value]);
|
|
|
|
bi_foreach_dest(I, d)
|
|
last_write[I->dest[d].value] = node;
|
|
|
|
switch (bi_opcode_props[I->op].message) {
|
|
case BIFROST_MESSAGE_LOAD:
|
|
/* Regular memory loads needs to be serialized against
|
|
* other memory access. However, UBO memory is read-only
|
|
* so it can be moved around freely.
|
|
*/
|
|
if (I->seg != BI_SEG_UBO) {
|
|
add_dep(node, memory_store);
|
|
memory_load = node;
|
|
}
|
|
|
|
break;
|
|
|
|
case BIFROST_MESSAGE_ATTRIBUTE:
|
|
/* Regular attribute loads can be reordered, but
|
|
* writeable attributes can't be. Our one use of
|
|
* writeable attributes are images.
|
|
*/
|
|
if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
|
|
(I->op == BI_OPCODE_LD_ATTR_TEX)) {
|
|
add_dep(node, memory_store);
|
|
memory_load = node;
|
|
}
|
|
|
|
break;
|
|
|
|
case BIFROST_MESSAGE_STORE:
|
|
assert(I->seg != BI_SEG_UBO);
|
|
add_dep(node, memory_load);
|
|
add_dep(node, memory_store);
|
|
memory_store = node;
|
|
break;
|
|
|
|
case BIFROST_MESSAGE_ATOMIC:
|
|
case BIFROST_MESSAGE_BARRIER:
|
|
add_dep(node, memory_load);
|
|
add_dep(node, memory_store);
|
|
memory_load = node;
|
|
memory_store = node;
|
|
break;
|
|
|
|
case BIFROST_MESSAGE_BLEND:
|
|
case BIFROST_MESSAGE_Z_STENCIL:
|
|
case BIFROST_MESSAGE_TILE:
|
|
add_dep(node, coverage);
|
|
coverage = node;
|
|
break;
|
|
|
|
case BIFROST_MESSAGE_ATEST:
|
|
/* ATEST signals the end of shader side effects */
|
|
add_dep(node, memory_store);
|
|
memory_store = node;
|
|
|
|
/* ATEST also updates coverage */
|
|
add_dep(node, coverage);
|
|
coverage = node;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
add_dep(node, preload);
|
|
|
|
if (I->op == BI_OPCODE_DISCARD_F32) {
|
|
/* Serialize against ATEST */
|
|
add_dep(node, coverage);
|
|
coverage = node;
|
|
|
|
/* Also serialize against memory and barriers */
|
|
add_dep(node, memory_load);
|
|
add_dep(node, memory_store);
|
|
memory_load = node;
|
|
memory_store = node;
|
|
} else if ((I->op == BI_OPCODE_PHI) ||
|
|
(I->op == BI_OPCODE_MOV_I32 &&
|
|
I->src[0].type == BI_INDEX_REGISTER)) {
|
|
preload = node;
|
|
}
|
|
}
|
|
|
|
free(last_write);
|
|
|
|
return dag;
|
|
}
|
|
|
|
/*
|
|
* Calculate the change in register pressure from scheduling a given
|
|
* instruction. Equivalently, calculate the difference in the number of live
|
|
* registers before and after the instruction, given the live set after the
|
|
* instruction. This calculation follows immediately from the dataflow
|
|
* definition of liveness:
|
|
*
|
|
* live_in = (live_out - KILL) + GEN
|
|
*/
|
|
static signed
|
|
calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
|
|
{
|
|
signed delta = 0;
|
|
|
|
/* Destinations must be unique */
|
|
bi_foreach_dest(I, d) {
|
|
if (BITSET_TEST(live, I->dest[d].value))
|
|
delta -= bi_count_write_registers(I, d);
|
|
}
|
|
|
|
bi_foreach_ssa_src(I, src) {
|
|
/* Filter duplicates */
|
|
bool dupe = false;
|
|
|
|
for (unsigned i = 0; i < src; ++i) {
|
|
if (bi_is_equiv(I->src[i], I->src[src])) {
|
|
dupe = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!dupe && !BITSET_TEST(live, I->src[src].value))
|
|
delta += bi_count_read_registers(I, src);
|
|
}
|
|
|
|
return delta;
|
|
}
|
|
|
|
/*
|
|
* Choose the next instruction, bottom-up. For now we use a simple greedy
|
|
* heuristic: choose the instruction that has the best effect on liveness.
|
|
*/
|
|
static struct sched_node *
|
|
choose_instr(struct sched_ctx *s)
|
|
{
|
|
int32_t min_delta = INT32_MAX;
|
|
struct sched_node *best = NULL;
|
|
|
|
list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
|
|
int32_t delta = calculate_pressure_delta(n->instr, s->live);
|
|
|
|
if (delta < min_delta) {
|
|
best = n;
|
|
min_delta = delta;
|
|
}
|
|
}
|
|
|
|
return best;
|
|
}
|
|
|
|
static void
|
|
pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
|
|
{
|
|
/* off by a constant, that's ok */
|
|
signed pressure = 0;
|
|
signed orig_max_pressure = 0;
|
|
unsigned nr_ins = 0;
|
|
|
|
memcpy(s->live, block->ssa_live_out,
|
|
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
|
|
|
bi_foreach_instr_in_block_rev(block, I) {
|
|
pressure += calculate_pressure_delta(I, s->live);
|
|
orig_max_pressure = MAX2(pressure, orig_max_pressure);
|
|
bi_liveness_ins_update_ssa(s->live, I);
|
|
nr_ins++;
|
|
}
|
|
|
|
memcpy(s->live, block->ssa_live_out,
|
|
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
|
|
|
/* off by a constant, that's ok */
|
|
signed max_pressure = 0;
|
|
pressure = 0;
|
|
|
|
struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
|
|
nr_ins = 0;
|
|
|
|
while (!list_is_empty(&s->dag->heads)) {
|
|
struct sched_node *node = choose_instr(s);
|
|
pressure += calculate_pressure_delta(node->instr, s->live);
|
|
max_pressure = MAX2(pressure, max_pressure);
|
|
dag_prune_head(s->dag, &node->dag);
|
|
|
|
schedule[nr_ins++] = node;
|
|
bi_liveness_ins_update_ssa(s->live, node->instr);
|
|
}
|
|
|
|
/* Bail if it looks like it's worse */
|
|
if (max_pressure >= orig_max_pressure) {
|
|
free(schedule);
|
|
return;
|
|
}
|
|
|
|
/* Apply the schedule */
|
|
for (unsigned i = 0; i < nr_ins; ++i) {
|
|
bi_remove_instruction(schedule[i]->instr);
|
|
list_add(&schedule[i]->instr->link, &block->instructions);
|
|
}
|
|
|
|
free(schedule);
|
|
}
|
|
|
|
void
|
|
bi_pressure_schedule(bi_context *ctx)
|
|
{
|
|
bi_compute_liveness_ssa(ctx);
|
|
void *memctx = ralloc_context(ctx);
|
|
BITSET_WORD *live =
|
|
ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
|
|
|
|
bi_foreach_block(ctx, block) {
|
|
struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
|
|
.live = live};
|
|
|
|
pressure_schedule_block(ctx, block, &sctx);
|
|
}
|
|
|
|
ralloc_free(memctx);
|
|
}
|