pan/bi: Add a bundling heuristic
Pick instructions locally that are likely to minimize nop's globally. Note the obvious greedy solution is ineffective. Instead the principle is to choose instructions which will increase _future_ scheduler freedom, on the assumption that means fewer nop's overall will be needed. There is no concern about register pressure and little concern about message timing, since this is post-RA and message ordering was fixed. total tuples in shared programs: 125304 -> 123770 (-1.22%) tuples in affected programs: 88301 -> 86767 (-1.74%) helped: 548 HURT: 198 helped stats (abs) min: 1.0 max: 31.0 x̄: 3.43 x̃: 2 helped stats (rel) min: 0.33% max: 16.67% x̄: 3.09% x̃: 2.44% HURT stats (abs) min: 1.0 max: 12.0 x̄: 1.73 x̃: 1 HURT stats (rel) min: 0.27% max: 11.11% x̄: 2.82% x̃: 2.35% 95% mean confidence interval for tuples value: -2.39 -1.72 95% mean confidence interval for tuples %-change: -1.77% -1.27% Tuples are helped. total clauses in shared programs: 26059 -> 25655 (-1.55%) clauses in affected programs: 10821 -> 10417 (-3.73%) helped: 311 HURT: 42 helped stats (abs) min: 1.0 max: 6.0 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.48% max: 16.00% x̄: 5.26% x̃: 5.00% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.05 x̃: 1 HURT stats (rel) min: 2.13% max: 33.33% x̄: 7.49% x̃: 6.25% 95% mean confidence interval for clauses value: -1.26 -1.02 95% mean confidence interval for clauses %-change: -4.28% -3.21% Clauses are helped. total cycles in shared programs: 12154.79 -> 12114.83 (-0.33%) cycles in affected programs: 1907.63 -> 1867.67 (-2.09%) helped: 227 HURT: 99 helped stats (abs) min: 0.041665999999999315 max: 1.2083360000000027 x̄: 0.22 x̃: 0 helped stats (rel) min: 0.36% max: 20.00% x̄: 4.14% x̃: 3.85% HURT stats (abs) min: 0.041665999999999315 max: 0.5 x̄: 0.09 x̃: 0 HURT stats (rel) min: 0.28% max: 9.09% x̄: 2.75% x̃: 2.63% 95% mean confidence interval for cycles value: -0.15 -0.09 95% mean confidence interval for cycles %-change: -2.51% -1.59% Cycles are helped. total arith in shared programs: 4658.13 -> 4603.42 (-1.17%) arith in affected programs: 3449.83 -> 3395.12 (-1.59%) helped: 509 HURT: 249 helped stats (abs) min: 0.041665999999999315 max: 1.2083360000000027 x̄: 0.14 x̃: 0 helped stats (rel) min: 0.36% max: 20.00% x̄: 3.56% x̃: 2.63% HURT stats (abs) min: 0.041665999999999315 max: 0.5 x̄: 0.07 x̃: 0 HURT stats (rel) min: 0.28% max: 25.00% x̄: 3.32% x̃: 2.27% 95% mean confidence interval for arith value: -0.09 -0.06 95% mean confidence interval for arith %-change: -1.64% -0.97% Arith are helped. total quadwords in shared programs: 111394 -> 110114 (-1.15%) quadwords in affected programs: 78074 -> 76794 (-1.64%) helped: 503 HURT: 204 helped stats (abs) min: 1.0 max: 43.0 x̄: 3.33 x̃: 2 helped stats (rel) min: 0.41% max: 13.79% x̄: 3.02% x̃: 2.44% HURT stats (abs) min: 1.0 max: 26.0 x̄: 1.94 x̃: 1 HURT stats (rel) min: 0.48% max: 7.55% x̄: 2.44% x̃: 2.22% 95% mean confidence interval for quadwords value: -2.16 -1.46 95% mean confidence interval for quadwords %-change: -1.67% -1.21% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>
This commit is contained in:
parent
5996622050
commit
ab2cd532ba
|
@ -1020,10 +1020,27 @@ bi_instr_schedulable(bi_instr *instr,
|
|||
}
|
||||
|
||||
static signed
|
||||
bi_instr_cost(bi_instr *instr)
|
||||
bi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple)
|
||||
{
|
||||
/* TODO: stub */
|
||||
return 0;
|
||||
signed cost = 0;
|
||||
|
||||
/* Instructions that can schedule to either FMA or to ADD should be
|
||||
* deprioritized since they're easier to reschedule elsewhere */
|
||||
if (bi_can_fma(instr) && bi_can_add(instr))
|
||||
cost++;
|
||||
|
||||
/* Message-passing instructions impose constraints on the registers
|
||||
* later in the clause, so schedule them as late within a clause as
|
||||
* possible (<==> prioritize them since we're backwards <==> decrease
|
||||
* cost) */
|
||||
if (bi_must_message(instr))
|
||||
cost--;
|
||||
|
||||
/* Last instructions are big constraints (XXX: no effect on shader-db) */
|
||||
if (bi_must_last(instr))
|
||||
cost -= 2;
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
@ -1042,9 +1059,14 @@ bi_choose_index(struct bi_worklist st,
|
|||
if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma))
|
||||
continue;
|
||||
|
||||
signed cost = bi_instr_cost(instr);
|
||||
signed cost = bi_instr_cost(instr, tuple);
|
||||
|
||||
if (cost < best_cost) {
|
||||
/* Tie break in favour of later instructions, under the
|
||||
* assumption this promotes temporary usage (reducing pressure
|
||||
* on the register file). This is a side effect of a prepass
|
||||
* scheduling for pressure. */
|
||||
|
||||
if (cost <= best_cost) {
|
||||
best_idx = i;
|
||||
best_cost = cost;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue