freedreno/ir3: add Sethi–Ullman numbering pass
Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
c8e351ee3a
commit
8a5f2d9444
|
@ -40,7 +40,8 @@ ir3_SOURCES := \
|
|||
ir3/ir3_ra.c \
|
||||
ir3/ir3_sched.c \
|
||||
ir3/ir3_shader.c \
|
||||
ir3/ir3_shader.h
|
||||
ir3/ir3_shader.h \
|
||||
ir3/ir3_sun.c
|
||||
|
||||
ir3_GENERATED_FILES := \
|
||||
ir3/ir3_nir_trig.c
|
||||
|
|
|
@ -29,8 +29,9 @@
|
|||
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/list.h"
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "instr-a3xx.h"
|
||||
|
||||
|
@ -292,6 +293,9 @@ struct ir3_instruction {
|
|||
*/
|
||||
void *data;
|
||||
|
||||
int sun; /* Sethi–Ullman number, used by sched */
|
||||
int use_count; /* currently just updated/used by cp */
|
||||
|
||||
/* Used during CP and RA stages. For fanin and shader inputs/
|
||||
* outputs where we need a sequence of consecutive registers,
|
||||
* keep track of each src instructions left (ie 'n-1') and right
|
||||
|
@ -363,8 +367,6 @@ struct ir3_instruction {
|
|||
/* Entry in ir3_block's instruction list: */
|
||||
struct list_head node;
|
||||
|
||||
int use_count; /* currently just updated/used by cp */
|
||||
|
||||
#ifdef DEBUG
|
||||
uint32_t serialno;
|
||||
#endif
|
||||
|
@ -443,6 +445,8 @@ struct ir3 {
|
|||
/* List of ir3_array's: */
|
||||
struct list_head array_list;
|
||||
|
||||
unsigned max_sun; /* max Sethi–Ullman number */
|
||||
|
||||
#ifdef DEBUG
|
||||
unsigned block_count, instr_count;
|
||||
#endif
|
||||
|
@ -739,6 +743,14 @@ static inline bool is_meta(struct ir3_instruction *instr)
|
|||
return (opc_cat(instr->opc) == -1);
|
||||
}
|
||||
|
||||
static inline unsigned dest_regs(struct ir3_instruction *instr)
|
||||
{
|
||||
if ((instr->regs_count == 0) || is_store(instr))
|
||||
return 0;
|
||||
|
||||
return util_last_bit(instr->regs[0]->wrmask);
|
||||
}
|
||||
|
||||
static inline bool writes_addr(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->regs_count > 0) {
|
||||
|
@ -999,6 +1011,9 @@ void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
|
|||
/* group neighbors and insert mov's to resolve conflicts: */
|
||||
void ir3_group(struct ir3 *ir);
|
||||
|
||||
/* Sethi–Ullman numbering: */
|
||||
void ir3_sun(struct ir3 *ir);
|
||||
|
||||
/* scheduling: */
|
||||
void ir3_sched_add_deps(struct ir3 *ir);
|
||||
int ir3_sched(struct ir3 *ir);
|
||||
|
|
|
@ -2610,6 +2610,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
ir3_print(ir);
|
||||
}
|
||||
|
||||
/* do Sethi–Ullman numbering before scheduling: */
|
||||
ir3_sun(ir);
|
||||
|
||||
ret = ir3_sched(ir);
|
||||
if (ret) {
|
||||
DBG("SCHED failed!");
|
||||
|
@ -2708,6 +2711,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
else
|
||||
so->total_in = max_bary + 1;
|
||||
|
||||
so->max_sun = ir->max_sun;
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
if (so->ir)
|
||||
|
|
|
@ -40,7 +40,8 @@ static void print_instr_name(struct ir3_instruction *instr)
|
|||
#endif
|
||||
printf("%04u:", instr->name);
|
||||
printf("%04u:", instr->ip);
|
||||
printf("%03u: ", instr->depth);
|
||||
printf("%03u:", instr->depth);
|
||||
printf("%03u: ", instr->sun);
|
||||
|
||||
if (instr->flags & IR3_INSTR_SY)
|
||||
printf("(sy)");
|
||||
|
|
|
@ -393,6 +393,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
|
|||
|
||||
fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy);
|
||||
|
||||
fprintf(out, "; max_sun=%u\n", ir->max_sun);
|
||||
|
||||
/* print shader type specific info: */
|
||||
switch (so->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
|
|
|
@ -344,6 +344,8 @@ struct ir3_shader_variant {
|
|||
*/
|
||||
unsigned branchstack;
|
||||
|
||||
unsigned max_sun;
|
||||
|
||||
/* the instructions length is in units of instruction groups
|
||||
* (4 instructions for a3xx, 16 instructions for a4xx.. each
|
||||
* instruction is 2 dwords):
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ir3.h"
|
||||
|
||||
/*
|
||||
* A simple pass to do Sethi–Ullman numbering, as described in "Generalizations
|
||||
* of the Sethi-Ullman algorithm for register allocation"[1]. This is used by
|
||||
* the scheduler pass.
|
||||
*
|
||||
* TODO this could probably be more clever about flow control, ie. if a src
|
||||
* is computed in multiple paths into a block, I think we should only have to
|
||||
* consider the worst-case.
|
||||
*
|
||||
* [1] https://pdfs.semanticscholar.org/ae53/6010b214612c2571f483354c264b0b39c545.pdf
|
||||
*/
|
||||
|
||||
static unsigned
|
||||
number_instr(struct ir3_instruction *instr)
|
||||
{
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return instr->sun;
|
||||
|
||||
struct ir3_instruction *src;
|
||||
const unsigned n = __ssa_src_cnt(instr);
|
||||
unsigned a[n];
|
||||
unsigned b[n];
|
||||
unsigned i = 0;
|
||||
|
||||
/* TODO I think including false-deps in the calculation is the right
|
||||
* thing to do:
|
||||
*/
|
||||
foreach_ssa_src_n(src, n, instr) {
|
||||
if (__is_false_dep(instr, n))
|
||||
continue;
|
||||
if (src->block != instr->block) {
|
||||
a[i] = 1;
|
||||
} else {
|
||||
a[i] = number_instr(src);
|
||||
}
|
||||
b[i] = dest_regs(src);
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rπ = max(aπ(1), bπ(1) + max(aπ(2), bπ(2) + max(..., bπ(k−1) + max(aπ(k), bπ(k)))...):
|
||||
*/
|
||||
unsigned last_r = 0;
|
||||
|
||||
for (int k = i - 1; k >= 0; k--) {
|
||||
unsigned r = MAX2(a[k], b[k] + last_r);
|
||||
|
||||
if (k > 0)
|
||||
r += b[k-1];
|
||||
|
||||
last_r = r;
|
||||
}
|
||||
|
||||
last_r = MAX2(last_r, dest_regs(instr));
|
||||
|
||||
instr->sun = last_r;
|
||||
|
||||
return instr->sun;
|
||||
}
|
||||
|
||||
void
|
||||
ir3_sun(struct ir3 *ir)
|
||||
{
|
||||
unsigned max = 0;
|
||||
|
||||
ir3_clear_mark(ir);
|
||||
|
||||
for (unsigned i = 0; i < ir->noutputs; i++)
|
||||
if (ir->outputs[i])
|
||||
max = MAX2(max, number_instr(ir->outputs[i]));
|
||||
|
||||
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
|
||||
for (unsigned i = 0; i < block->keeps_count; i++)
|
||||
max = MAX2(max, number_instr(block->keeps[i]));
|
||||
if (block->condition)
|
||||
max = MAX2(max, number_instr(block->condition));
|
||||
}
|
||||
|
||||
ir->max_sun = max;
|
||||
}
|
|
@ -56,6 +56,7 @@ libfreedreno_ir3_files = files(
|
|||
'ir3_sched.c',
|
||||
'ir3_shader.c',
|
||||
'ir3_shader.h',
|
||||
'ir3_sun.c',
|
||||
)
|
||||
|
||||
libfreedreno_ir3 = static_library(
|
||||
|
|
|
@ -52,7 +52,8 @@ dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug
|
|||
"SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n"
|
||||
"SHADER-DB: %s prog %d/%d: %u half, %u full\n"
|
||||
"SHADER-DB: %s prog %d/%d: %u const, %u constlen\n"
|
||||
"SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n",
|
||||
"SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n"
|
||||
"SHADER-DB: %s prog %d/%d: max_sun=%u\n",
|
||||
ir3_shader_stage(v->shader),
|
||||
v->shader->id, v->id,
|
||||
v->info.instrs_count,
|
||||
|
@ -67,7 +68,10 @@ dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug
|
|||
v->constlen,
|
||||
ir3_shader_stage(v->shader),
|
||||
v->shader->id, v->id,
|
||||
v->info.ss, v->info.sy);
|
||||
v->info.ss, v->info.sy,
|
||||
ir3_shader_stage(v->shader),
|
||||
v->shader->id, v->id,
|
||||
v->max_sun);
|
||||
}
|
||||
|
||||
struct ir3_shader_variant *
|
||||
|
|
Loading…
Reference in New Issue