freedreno/ir3: split out delay helpers
We're going to want these also for a post-RA sched pass. And also to split nop stuffing out into it's own pass. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3569>
This commit is contained in:
parent
54c795f829
commit
c803c662f9
|
@ -1113,10 +1113,16 @@ static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
|
||||||
void ir3_print(struct ir3 *ir);
|
void ir3_print(struct ir3 *ir);
|
||||||
void ir3_print_instr(struct ir3_instruction *instr);
|
void ir3_print_instr(struct ir3_instruction *instr);
|
||||||
|
|
||||||
/* depth calculation: */
|
/* delay calculation: */
|
||||||
struct ir3_shader_variant;
|
|
||||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
int ir3_delayslots(struct ir3_instruction *assigner,
|
||||||
struct ir3_instruction *consumer, unsigned n);
|
struct ir3_instruction *consumer, unsigned n);
|
||||||
|
unsigned ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
|
||||||
|
unsigned maxd, bool pred);
|
||||||
|
unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
|
||||||
|
bool soft, bool pred);
|
||||||
|
|
||||||
|
/* depth calculation: */
|
||||||
|
struct ir3_shader_variant;
|
||||||
void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
|
void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
|
||||||
void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so);
|
void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,337 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Google, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Rob Clark <robclark@freedesktop.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ir3.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helpers to figure out the necessary delay slots between instructions. Used
|
||||||
|
* both in scheduling pass(es) and the final pass to insert any required nop's
|
||||||
|
* so that the shader program is valid.
|
||||||
|
*
|
||||||
|
* Note that this needs to work both pre and post RA, so we can't assume ssa
|
||||||
|
* src iterators work.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* generally don't count false dependencies, since this can just be
|
||||||
|
* something like a barrier, or SSBO store. The exception is array
|
||||||
|
* dependencies if the assigner is an array write and the consumer
|
||||||
|
* reads the same array.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ignore_dep(struct ir3_instruction *assigner,
|
||||||
|
struct ir3_instruction *consumer, unsigned n)
|
||||||
|
{
|
||||||
|
if (!__is_false_dep(consumer, n))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
|
||||||
|
struct ir3_register *dst = assigner->regs[0];
|
||||||
|
struct ir3_register *src;
|
||||||
|
|
||||||
|
debug_assert(dst->flags & IR3_REG_ARRAY);
|
||||||
|
|
||||||
|
foreach_src (src, consumer) {
|
||||||
|
if ((src->flags & IR3_REG_ARRAY) &&
|
||||||
|
(dst->array.id == src->array.id)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate required # of delay slots between the instruction that
|
||||||
|
* assigns a value and the one that consumes
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
ir3_delayslots(struct ir3_instruction *assigner,
|
||||||
|
struct ir3_instruction *consumer, unsigned n)
|
||||||
|
{
|
||||||
|
if (ignore_dep(assigner, consumer, n))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
|
||||||
|
* alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
|
||||||
|
* handled with sync bits
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (is_meta(assigner) || is_meta(consumer))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (writes_addr(assigner))
|
||||||
|
return 6;
|
||||||
|
|
||||||
|
/* handled via sync flags: */
|
||||||
|
if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* assigner must be alu: */
|
||||||
|
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
|
||||||
|
is_mem(consumer)) {
|
||||||
|
return 6;
|
||||||
|
} else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
|
||||||
|
(n == 3)) {
|
||||||
|
/* special case, 3rd src to cat3 not required on first cycle */
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
count_instruction(struct ir3_instruction *n)
|
||||||
|
{
|
||||||
|
/* NOTE: don't count branch/jump since we don't know yet if they will
|
||||||
|
* be eliminated later in resolve_jumps().. really should do that
|
||||||
|
* earlier so we don't have this constraint.
|
||||||
|
*/
|
||||||
|
return is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @block: the block to search in, starting from end; in first pass,
|
||||||
|
* this will be the block the instruction would be inserted into
|
||||||
|
* (but has not yet, ie. it only contains already scheduled
|
||||||
|
* instructions). For intra-block scheduling (second pass), this
|
||||||
|
* would be one of the predecessor blocks.
|
||||||
|
* @instr: the instruction to search for
|
||||||
|
* @maxd: max distance, bail after searching this # of instruction
|
||||||
|
* slots, since it means the instruction we are looking for is
|
||||||
|
* far enough away
|
||||||
|
* @pred: if true, recursively search into predecessor blocks to
|
||||||
|
* find the worst case (shortest) distance (only possible after
|
||||||
|
* individual blocks are all scheduled)
|
||||||
|
*/
|
||||||
|
unsigned
|
||||||
|
ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
|
||||||
|
unsigned maxd, bool pred)
|
||||||
|
{
|
||||||
|
unsigned d = 0;
|
||||||
|
|
||||||
|
/* Note that this relies on incrementally building up the block's
|
||||||
|
* instruction list.. but this is how scheduling and nopsched
|
||||||
|
* work.
|
||||||
|
*/
|
||||||
|
foreach_instr_rev (n, &block->instr_list) {
|
||||||
|
if ((n == instr) || (d >= maxd))
|
||||||
|
return MIN2(maxd, d + n->nop);
|
||||||
|
if (count_instruction(n))
|
||||||
|
d = MIN2(maxd, d + 1 + n->repeat + n->nop);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if coming from a predecessor block, assume it is assigned far
|
||||||
|
* enough away.. we'll fix up later.
|
||||||
|
*/
|
||||||
|
if (!pred)
|
||||||
|
return maxd;
|
||||||
|
|
||||||
|
if (pred && (block->data != block)) {
|
||||||
|
/* Search into predecessor blocks, finding the one with the
|
||||||
|
* shortest distance, since that will be the worst case
|
||||||
|
*/
|
||||||
|
unsigned min = maxd - d;
|
||||||
|
|
||||||
|
/* (ab)use block->data to prevent recursion: */
|
||||||
|
block->data = block;
|
||||||
|
|
||||||
|
set_foreach (block->predecessors, entry) {
|
||||||
|
struct ir3_block *pred = (struct ir3_block *)entry->key;
|
||||||
|
unsigned n;
|
||||||
|
|
||||||
|
n = ir3_distance(pred, instr, min, pred);
|
||||||
|
|
||||||
|
min = MIN2(min, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
block->data = NULL;
|
||||||
|
d += min;
|
||||||
|
}
|
||||||
|
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate delay for specified src: */
|
||||||
|
static unsigned
|
||||||
|
delay_calc_srcn(struct ir3_block *block,
|
||||||
|
struct ir3_instruction *assigner,
|
||||||
|
struct ir3_instruction *consumer,
|
||||||
|
unsigned srcn, bool soft, bool pred)
|
||||||
|
{
|
||||||
|
unsigned delay = 0;
|
||||||
|
|
||||||
|
if (is_meta(assigner)) {
|
||||||
|
struct ir3_register *src;
|
||||||
|
foreach_src (src, assigner) {
|
||||||
|
unsigned d;
|
||||||
|
|
||||||
|
if (!src->instr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
d = delay_calc_srcn(block, src->instr, consumer, srcn, soft, pred);
|
||||||
|
delay = MAX2(delay, d);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (soft) {
|
||||||
|
if (is_sfu(assigner)) {
|
||||||
|
delay = 4;
|
||||||
|
} else {
|
||||||
|
delay = ir3_delayslots(assigner, consumer, srcn);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
delay = ir3_delayslots(assigner, consumer, srcn);
|
||||||
|
}
|
||||||
|
delay -= ir3_distance(block, assigner, delay, pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ir3_instruction *
|
||||||
|
find_array_write(struct ir3_block *block, unsigned array_id, unsigned maxd)
|
||||||
|
{
|
||||||
|
unsigned d = 0;
|
||||||
|
|
||||||
|
/* Note that this relies on incrementally building up the block's
|
||||||
|
* instruction list.. but this is how scheduling and nopsched
|
||||||
|
* work.
|
||||||
|
*/
|
||||||
|
foreach_instr_rev (n, &block->instr_list) {
|
||||||
|
if (d >= maxd)
|
||||||
|
return NULL;
|
||||||
|
if (count_instruction(n))
|
||||||
|
d++;
|
||||||
|
if (dest_regs(n) == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* note that a dest reg will never be an immediate */
|
||||||
|
if (n->regs[0]->array.id == array_id)
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* like list_length() but only counts instructions which count in the
|
||||||
|
* delay determination:
|
||||||
|
*/
|
||||||
|
static unsigned
|
||||||
|
count_block_delay(struct ir3_block *block)
|
||||||
|
{
|
||||||
|
unsigned delay = 0;
|
||||||
|
foreach_instr (n, &block->instr_list) {
|
||||||
|
if (!count_instruction(n))
|
||||||
|
continue;
|
||||||
|
delay++;
|
||||||
|
}
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
delay_calc_array(struct ir3_block *block, unsigned array_id,
|
||||||
|
struct ir3_instruction *consumer, unsigned srcn,
|
||||||
|
bool soft, bool pred, unsigned maxd)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *assigner;
|
||||||
|
|
||||||
|
assigner = find_array_write(block, array_id, maxd);
|
||||||
|
if (assigner)
|
||||||
|
return delay_calc_srcn(block, assigner, consumer, srcn, soft, pred);
|
||||||
|
|
||||||
|
if (!pred)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unsigned len = count_block_delay(block);
|
||||||
|
if (maxd <= len)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
maxd -= len;
|
||||||
|
|
||||||
|
if (block->data == block) {
|
||||||
|
/* we have a loop, return worst case: */
|
||||||
|
return maxd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we need to search into predecessors, find the one with the
|
||||||
|
* max delay.. the resulting delay is that minus the number of
|
||||||
|
* counted instructions in this block:
|
||||||
|
*/
|
||||||
|
unsigned max = 0;
|
||||||
|
|
||||||
|
/* (ab)use block->data to prevent recursion: */
|
||||||
|
block->data = block;
|
||||||
|
|
||||||
|
set_foreach (block->predecessors, entry) {
|
||||||
|
struct ir3_block *pred = (struct ir3_block *)entry->key;
|
||||||
|
unsigned delay =
|
||||||
|
delay_calc_array(pred, array_id, consumer, srcn, soft, pred, maxd);
|
||||||
|
|
||||||
|
max = MAX2(max, delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
block->data = NULL;
|
||||||
|
|
||||||
|
if (max < len)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return max - len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate delay for instruction (maximum of delay for all srcs):
|
||||||
|
*
|
||||||
|
* @soft: If true, add additional delay for situations where they
|
||||||
|
* would not be strictly required because a sync flag would be
|
||||||
|
* used (but scheduler would prefer to schedule some other
|
||||||
|
* instructions first to avoid stalling on sync flag)
|
||||||
|
* @pred: If true, recurse into predecessor blocks
|
||||||
|
*/
|
||||||
|
unsigned
|
||||||
|
ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
|
||||||
|
bool soft, bool pred)
|
||||||
|
{
|
||||||
|
unsigned delay = 0;
|
||||||
|
struct ir3_register *src;
|
||||||
|
|
||||||
|
foreach_src_n (src, i, instr) {
|
||||||
|
unsigned d = 0;
|
||||||
|
|
||||||
|
if ((src->flags & IR3_REG_RELATIV) && !(src->flags & IR3_REG_CONST)) {
|
||||||
|
d = delay_calc_array(block, src->array.id, instr, i+1, soft, pred, 6);
|
||||||
|
} else if (src->instr) {
|
||||||
|
d = delay_calc_srcn(block, src->instr, instr, i+1, soft, pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
delay = MAX2(delay, d);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instr->address) {
|
||||||
|
unsigned d = delay_calc_srcn(block, instr->address, instr, 0, soft, pred);
|
||||||
|
delay = MAX2(delay, d);
|
||||||
|
}
|
||||||
|
|
||||||
|
return delay;
|
||||||
|
}
|
|
@ -48,72 +48,6 @@
|
||||||
* blocks depth sorted list, which is used by the scheduling pass.
|
* blocks depth sorted list, which is used by the scheduling pass.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* generally don't count false dependencies, since this can just be
|
|
||||||
* something like a barrier, or SSBO store. The exception is array
|
|
||||||
* dependencies if the assigner is an array write and the consumer
|
|
||||||
* reads the same array.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
ignore_dep(struct ir3_instruction *assigner,
|
|
||||||
struct ir3_instruction *consumer, unsigned n)
|
|
||||||
{
|
|
||||||
if (!__is_false_dep(consumer, n))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
|
|
||||||
struct ir3_register *dst = assigner->regs[0];
|
|
||||||
struct ir3_register *src;
|
|
||||||
|
|
||||||
debug_assert(dst->flags & IR3_REG_ARRAY);
|
|
||||||
|
|
||||||
foreach_src(src, consumer) {
|
|
||||||
if ((src->flags & IR3_REG_ARRAY) &&
|
|
||||||
(dst->array.id == src->array.id)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* calculate required # of delay slots between the instruction that
|
|
||||||
* assigns a value and the one that consumes
|
|
||||||
*/
|
|
||||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
|
||||||
struct ir3_instruction *consumer, unsigned n)
|
|
||||||
{
|
|
||||||
if (ignore_dep(assigner, consumer, n))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
|
|
||||||
* alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
|
|
||||||
* handled with sync bits
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (is_meta(assigner) || is_meta(consumer))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (writes_addr(assigner))
|
|
||||||
return 6;
|
|
||||||
|
|
||||||
/* handled via sync flags: */
|
|
||||||
if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* assigner must be alu: */
|
|
||||||
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
|
|
||||||
is_mem(consumer)) {
|
|
||||||
return 6;
|
|
||||||
} else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
|
|
||||||
(n == 3)) {
|
|
||||||
/* special case, 3rd src to cat3 not required on first cycle */
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
|
ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
|
||||||
{
|
{
|
||||||
|
|
|
@ -265,117 +265,6 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs)
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @block: the block to search in, starting from end; in first pass,
|
|
||||||
* this will be the block the instruction would be inserted into
|
|
||||||
* (but has not yet, ie. it only contains already scheduled
|
|
||||||
* instructions). For intra-block scheduling (second pass), this
|
|
||||||
* would be one of the predecessor blocks.
|
|
||||||
* @instr: the instruction to search for
|
|
||||||
* @maxd: max distance, bail after searching this # of instruction
|
|
||||||
* slots, since it means the instruction we are looking for is
|
|
||||||
* far enough away
|
|
||||||
* @pred: if true, recursively search into predecessor blocks to
|
|
||||||
* find the worst case (shortest) distance (only possible after
|
|
||||||
* individual blocks are all scheduled
|
|
||||||
*/
|
|
||||||
static unsigned
|
|
||||||
distance(struct ir3_block *block, struct ir3_instruction *instr,
|
|
||||||
unsigned maxd, bool pred)
|
|
||||||
{
|
|
||||||
unsigned d = 0;
|
|
||||||
|
|
||||||
foreach_instr_rev (n, &block->instr_list) {
|
|
||||||
if ((n == instr) || (d >= maxd))
|
|
||||||
return d;
|
|
||||||
/* NOTE: don't count branch/jump since we don't know yet if they will
|
|
||||||
* be eliminated later in resolve_jumps().. really should do that
|
|
||||||
* earlier so we don't have this constraint.
|
|
||||||
*/
|
|
||||||
if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR)))
|
|
||||||
d++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if coming from a predecessor block, assume it is assigned far
|
|
||||||
* enough away.. we'll fix up later.
|
|
||||||
*/
|
|
||||||
if (!pred)
|
|
||||||
return maxd;
|
|
||||||
|
|
||||||
if (pred && (block->data != block)) {
|
|
||||||
/* Search into predecessor blocks, finding the one with the
|
|
||||||
* shortest distance, since that will be the worst case
|
|
||||||
*/
|
|
||||||
unsigned min = maxd - d;
|
|
||||||
|
|
||||||
/* (ab)use block->data to prevent recursion: */
|
|
||||||
block->data = block;
|
|
||||||
|
|
||||||
set_foreach(block->predecessors, entry) {
|
|
||||||
struct ir3_block *pred = (struct ir3_block *)entry->key;
|
|
||||||
unsigned n;
|
|
||||||
|
|
||||||
n = distance(pred, instr, min, pred);
|
|
||||||
|
|
||||||
min = MIN2(min, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
block->data = NULL;
|
|
||||||
d += min;
|
|
||||||
}
|
|
||||||
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* calculate delay for specified src: */
|
|
||||||
static unsigned
|
|
||||||
delay_calc_srcn(struct ir3_block *block,
|
|
||||||
struct ir3_instruction *assigner,
|
|
||||||
struct ir3_instruction *consumer,
|
|
||||||
unsigned srcn, bool soft, bool pred)
|
|
||||||
{
|
|
||||||
unsigned delay = 0;
|
|
||||||
|
|
||||||
if (is_meta(assigner)) {
|
|
||||||
struct ir3_instruction *src;
|
|
||||||
foreach_ssa_src(src, assigner) {
|
|
||||||
unsigned d;
|
|
||||||
d = delay_calc_srcn(block, src, consumer, srcn, soft, pred);
|
|
||||||
delay = MAX2(delay, d);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (soft) {
|
|
||||||
if (is_sfu(assigner)) {
|
|
||||||
delay = 4;
|
|
||||||
} else {
|
|
||||||
delay = ir3_delayslots(assigner, consumer, srcn);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
delay = ir3_delayslots(assigner, consumer, srcn);
|
|
||||||
}
|
|
||||||
delay -= distance(block, assigner, delay, pred);
|
|
||||||
}
|
|
||||||
|
|
||||||
return delay;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* calculate delay for instruction (maximum of delay for all srcs): */
|
|
||||||
static unsigned
|
|
||||||
delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
|
|
||||||
bool soft, bool pred)
|
|
||||||
{
|
|
||||||
unsigned delay = 0;
|
|
||||||
struct ir3_instruction *src;
|
|
||||||
|
|
||||||
foreach_ssa_src_n(src, i, instr) {
|
|
||||||
unsigned d;
|
|
||||||
d = delay_calc_srcn(block, src, instr, i, soft, pred);
|
|
||||||
delay = MAX2(delay, d);
|
|
||||||
}
|
|
||||||
|
|
||||||
return delay;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ir3_sched_notes {
|
struct ir3_sched_notes {
|
||||||
/* there is at least one kill which could be scheduled, except
|
/* there is at least one kill which could be scheduled, except
|
||||||
* for unscheduled bary.f's:
|
* for unscheduled bary.f's:
|
||||||
|
@ -658,7 +547,7 @@ find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rank = delay_calc(ctx->block, candidate, soft, false);
|
int rank = ir3_delay_calc(ctx->block, candidate, soft, false);
|
||||||
|
|
||||||
/* if too many live values, prioritize instructions that reduce the
|
/* if too many live values, prioritize instructions that reduce the
|
||||||
* number of live values:
|
* number of live values:
|
||||||
|
@ -827,7 +716,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||||
instr = find_eligible_instr(ctx, ¬es, false);
|
instr = find_eligible_instr(ctx, ¬es, false);
|
||||||
|
|
||||||
if (instr) {
|
if (instr) {
|
||||||
unsigned delay = delay_calc(ctx->block, instr, false, false);
|
unsigned delay = ir3_delay_calc(ctx->block, instr, false, false);
|
||||||
|
|
||||||
d("delay=%u", delay);
|
d("delay=%u", delay);
|
||||||
|
|
||||||
|
@ -886,7 +775,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||||
debug_assert(ctx->pred);
|
debug_assert(ctx->pred);
|
||||||
debug_assert(block->condition);
|
debug_assert(block->condition);
|
||||||
|
|
||||||
delay -= distance(ctx->block, ctx->pred, delay, false);
|
delay -= ir3_distance(ctx->block, ctx->pred, delay, false);
|
||||||
|
|
||||||
while (delay > 0) {
|
while (delay > 0) {
|
||||||
ir3_NOP(block);
|
ir3_NOP(block);
|
||||||
|
@ -944,7 +833,7 @@ sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||||
|
|
||||||
set_foreach(block->predecessors, entry) {
|
set_foreach(block->predecessors, entry) {
|
||||||
struct ir3_block *pred = (struct ir3_block *)entry->key;
|
struct ir3_block *pred = (struct ir3_block *)entry->key;
|
||||||
unsigned d = delay_calc(pred, instr, false, true);
|
unsigned d = ir3_delay_calc(pred, instr, false, true);
|
||||||
delay = MAX2(d, delay);
|
delay = MAX2(d, delay);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,7 @@ libfreedreno_ir3_files = files(
|
||||||
'ir3_context.c',
|
'ir3_context.c',
|
||||||
'ir3_context.h',
|
'ir3_context.h',
|
||||||
'ir3_cp.c',
|
'ir3_cp.c',
|
||||||
|
'ir3_delay.c',
|
||||||
'ir3_depth.c',
|
'ir3_depth.c',
|
||||||
'ir3_group.c',
|
'ir3_group.c',
|
||||||
'ir3_image.c',
|
'ir3_image.c',
|
||||||
|
|
Loading…
Reference in New Issue