vc4: Add a CSE optimization pass.
Debugging a regression in discard support was just too full of duplicate instructions, so I decided to remove them instead of re-analyzing each of them as I dumped their outputs in simulation.
This commit is contained in:
parent
80b27ca2cd
commit
55d2a16262
|
@ -7,6 +7,7 @@ C_SOURCES := \
|
|||
vc4_formats.c \
|
||||
vc4_opt_algebraic.c \
|
||||
vc4_opt_copy_propagation.c \
|
||||
vc4_opt_cse.c \
|
||||
vc4_opt_dead_code.c \
|
||||
vc4_program.c \
|
||||
vc4_qir.c \
|
||||
|
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file vc4_opt_cse.c
|
||||
*
|
||||
* Implements CSE for QIR without control flow.
|
||||
*
|
||||
* For each operation that writes a destination (and isn't just a MOV), put it
|
||||
* in the hash table of all instructions that do so. When faced with another
|
||||
* one, look it up in the hash table by its opcode and operands. If there's
|
||||
* an entry in the table, then just reuse the entry's destination as the
|
||||
* source of a MOV instead of reproducing the computation. That MOV will then
|
||||
* get cleaned up by copy propagation.
|
||||
*/
|
||||
|
||||
#include "vc4_qir.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static bool debug;
|
||||
|
||||
struct inst_key {
|
||||
enum qop op;
|
||||
struct qreg src[4];
|
||||
/**
|
||||
* If the instruction depends on the flags, how many QOP_SFs have been
|
||||
* seen before this instruction, or if it depends on r4, how many r4
|
||||
* writes have been seen.
|
||||
*/
|
||||
uint32_t implicit_arg_update_count;
|
||||
};
|
||||
|
||||
static bool
|
||||
inst_key_equals(const void *a, const void *b)
|
||||
{
|
||||
const struct inst_key *key_a = a;
|
||||
const struct inst_key *key_b = b;
|
||||
|
||||
return memcmp(key_a, key_b, sizeof(*key_a)) == 0;
|
||||
}
|
||||
|
||||
static struct qinst *
|
||||
vc4_find_cse(struct hash_table *ht, struct qinst *inst, uint32_t sf_count,
|
||||
uint32_t r4_count)
|
||||
{
|
||||
if (inst->dst.file != QFILE_TEMP ||
|
||||
inst->op == QOP_MOV ||
|
||||
qir_get_op_nsrc(inst->op) > 4) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct inst_key key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.op = inst->op;
|
||||
memcpy(key.src, inst->src,
|
||||
qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
|
||||
if (qir_depends_on_flags(inst))
|
||||
key.implicit_arg_update_count = sf_count;
|
||||
if (qir_reads_r4(inst))
|
||||
key.implicit_arg_update_count = r4_count;
|
||||
|
||||
uint32_t hash = _mesa_hash_data(&key, sizeof(key));
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(ht, hash, &key);
|
||||
|
||||
if (entry) {
|
||||
if (debug) {
|
||||
fprintf(stderr, "CSE found match:\n");
|
||||
|
||||
fprintf(stderr, " Original inst: ");
|
||||
qir_dump_inst(entry->data);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fprintf(stderr, " Our inst: ");
|
||||
qir_dump_inst(inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
return entry->data;
|
||||
}
|
||||
|
||||
struct inst_key *alloc_key = ralloc(ht, struct inst_key);
|
||||
if (!alloc_key)
|
||||
return NULL;
|
||||
memcpy(alloc_key, &key, sizeof(*alloc_key));
|
||||
_mesa_hash_table_insert(ht, hash, alloc_key, inst);
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Added to CSE HT: ");
|
||||
qir_dump_inst(inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
qir_opt_cse(struct qcompile *c)
|
||||
{
|
||||
bool progress = false;
|
||||
struct simple_node *node, *t;
|
||||
struct qinst *last_sf = NULL;
|
||||
uint32_t sf_count = 0, r4_count = 0;
|
||||
|
||||
return false;
|
||||
struct hash_table *ht = _mesa_hash_table_create(NULL, inst_key_equals);
|
||||
if (!ht)
|
||||
return false;
|
||||
|
||||
foreach_s(node, t, &c->instructions) {
|
||||
struct qinst *inst = (struct qinst *)node;
|
||||
|
||||
if (qir_has_side_effects(inst)) {
|
||||
if (inst->op == QOP_TLB_DISCARD_SETUP)
|
||||
last_sf = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst->op == QOP_SF) {
|
||||
if (last_sf &&
|
||||
qir_reg_equals(last_sf->src[0], inst->src[0])) {
|
||||
if (debug) {
|
||||
fprintf(stderr,
|
||||
"Removing redundant SF: ");
|
||||
qir_dump_inst(inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
remove_from_list(&inst->link);
|
||||
progress = true;
|
||||
continue;
|
||||
} else {
|
||||
last_sf = inst;
|
||||
sf_count++;
|
||||
}
|
||||
} else {
|
||||
struct qinst *cse = vc4_find_cse(ht, inst,
|
||||
sf_count, r4_count);
|
||||
if (cse) {
|
||||
inst->src[0] = cse->dst;
|
||||
for (int i = 1; i < qir_get_op_nsrc(inst->op);
|
||||
i++)
|
||||
inst->src[i] = c->undef;
|
||||
inst->op = QOP_MOV;
|
||||
progress = true;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, " Turned into: ");
|
||||
qir_dump_inst(inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (qir_reads_r4(inst))
|
||||
r4_count++;
|
||||
}
|
||||
|
||||
ralloc_free(ht);
|
||||
|
||||
return progress;
|
||||
}
|
|
@ -127,6 +127,54 @@ qir_has_side_effects(struct qinst *inst)
|
|||
return qir_op_info[inst->op].has_side_effects;
|
||||
}
|
||||
|
||||
bool
|
||||
qir_depends_on_flags(struct qinst *inst)
|
||||
{
|
||||
switch (inst->op) {
|
||||
case QOP_SEL_X_0_NS:
|
||||
case QOP_SEL_X_0_NC:
|
||||
case QOP_SEL_X_0_ZS:
|
||||
case QOP_SEL_X_0_ZC:
|
||||
case QOP_SEL_X_Y_NS:
|
||||
case QOP_SEL_X_Y_NC:
|
||||
case QOP_SEL_X_Y_ZS:
|
||||
case QOP_SEL_X_Y_ZC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
qir_writes_r4(struct qinst *inst)
|
||||
{
|
||||
switch (inst->op) {
|
||||
case QOP_TEX_RESULT:
|
||||
case QOP_TLB_COLOR_READ:
|
||||
case QOP_RCP:
|
||||
case QOP_RSQ:
|
||||
case QOP_EXP2:
|
||||
case QOP_LOG2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
qir_reads_r4(struct qinst *inst)
|
||||
{
|
||||
switch (inst->op) {
|
||||
case QOP_R4_UNPACK_A:
|
||||
case QOP_R4_UNPACK_B:
|
||||
case QOP_R4_UNPACK_C:
|
||||
case QOP_R4_UNPACK_D:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
qir_print_reg(struct qreg reg)
|
||||
{
|
||||
|
@ -274,6 +322,7 @@ qir_optimize(struct qcompile *c)
|
|||
bool progress = false;
|
||||
|
||||
OPTPASS(qir_opt_algebraic);
|
||||
OPTPASS(qir_opt_cse);
|
||||
OPTPASS(qir_opt_copy_propagation);
|
||||
OPTPASS(qir_opt_dead_code);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/u_simple_list.h"
|
||||
|
||||
|
@ -227,6 +228,9 @@ struct qreg qir_get_temp(struct qcompile *c);
|
|||
int qir_get_op_nsrc(enum qop qop);
|
||||
bool qir_reg_equals(struct qreg a, struct qreg b);
|
||||
bool qir_has_side_effects(struct qinst *inst);
|
||||
bool qir_depends_on_flags(struct qinst *inst);
|
||||
bool qir_writes_r4(struct qinst *inst);
|
||||
bool qir_reads_r4(struct qinst *inst);
|
||||
|
||||
void qir_dump(struct qcompile *c);
|
||||
void qir_dump_inst(struct qinst *inst);
|
||||
|
@ -235,6 +239,7 @@ const char *qir_get_stage_name(enum qstage stage);
|
|||
void qir_optimize(struct qcompile *c);
|
||||
bool qir_opt_algebraic(struct qcompile *c);
|
||||
bool qir_opt_copy_propagation(struct qcompile *c);
|
||||
bool qir_opt_cse(struct qcompile *c);
|
||||
bool qir_opt_dead_code(struct qcompile *c);
|
||||
|
||||
#define QIR_ALU0(name) \
|
||||
|
|
Loading…
Reference in New Issue