diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 95f1a340ab3..c7254ea1473 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -20,6 +20,7 @@ C_SOURCES := \ vc4_packet.h \ vc4_program.c \ vc4_qir.c \ + vc4_qir_lower_uniforms.c \ vc4_qir.h \ vc4_qpu.c \ vc4_qpu_defines.h \ diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index 07e1cb14b0d..f8c49a44bd3 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -49,22 +49,11 @@ qir_opt_copy_propagation(struct vc4_compile *c) if (inst->dst.file == QFILE_TEMP) defs[inst->dst.index] = inst; - /* A single instruction can only read one uniform value. (It - * could maybe read the same uniform value in two operands, - * but that doesn't seem important to do). - */ - bool reads_a_uniform = false; - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - if (inst->src[i].file == QFILE_UNIF) - reads_a_uniform = true; - } - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { int index = inst->src[i].index; if (inst->src[i].file == QFILE_TEMP && (movs[index].file == QFILE_TEMP || - (movs[index].file == QFILE_UNIF && - !reads_a_uniform))) { + movs[index].file == QFILE_UNIF)) { if (debug) { fprintf(stderr, "Copy propagate: "); qir_dump_inst(c, inst); @@ -72,8 +61,6 @@ qir_opt_copy_propagation(struct vc4_compile *c) } inst->src[i] = movs[index]; - if (movs[index].file == QFILE_UNIF) - reads_a_uniform = true; if (debug) { fprintf(stderr, "to: "); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a1d9a7f064c..b904679ef00 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -142,25 +142,16 @@ qir_uniform(struct vc4_compile *c, return u; } -static struct qreg -get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents, - uint32_t data) -{ - struct qreg u = qir_uniform(c, contents, data); - struct qreg t = qir_MOV(c, u); - return t; -} - static struct qreg qir_uniform_ui(struct vc4_compile *c, uint32_t ui) { - return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui); + return qir_uniform(c, QUNIFORM_CONSTANT, ui); } static struct qreg qir_uniform_f(struct vc4_compile *c, float f) { - return qir_uniform_ui(c, fui(f)); + return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); } static struct qreg @@ -232,8 +223,7 @@ get_src(struct vc4_compile *c, unsigned tgsi_op, if (src->Indirect) { r = indirect_uniform_load(c, full_src, s); } else { - r = get_temp_for_uniform(c, QUNIFORM_UNIFORM, - src->Index * 4 + s); + r = qir_uniform(c, QUNIFORM_UNIFORM, src->Index * 4 + s); } break; case TGSI_FILE_INPUT: @@ -660,13 +650,9 @@ tgsi_to_qir_tex(struct vc4_compile *c, if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT || tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) { s = qir_FMUL(c, s, - get_temp_for_uniform(c, - QUNIFORM_TEXRECT_SCALE_X, - unit)); + qir_uniform(c, QUNIFORM_TEXRECT_SCALE_X, unit)); t = qir_FMUL(c, t, - get_temp_for_uniform(c, - QUNIFORM_TEXRECT_SCALE_Y, - unit)); + qir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y, unit)); } if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE || @@ -689,7 +675,7 @@ tgsi_to_qir_tex(struct vc4_compile *c, c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP || c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER || c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) { - qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit), + qir_TEX_R(c, qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit), texture_u[next_texture_u++]); } @@ -1504,14 +1490,11 @@ vc4_blend_channel(struct vc4_compile *c, } case PIPE_BLENDFACTOR_CONST_COLOR: return qir_FMUL(c, val, - get_temp_for_uniform(c, - QUNIFORM_BLEND_CONST_COLOR, - channel)); + qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, + channel)); case PIPE_BLENDFACTOR_CONST_ALPHA: return qir_FMUL(c, val, - get_temp_for_uniform(c, - QUNIFORM_BLEND_CONST_COLOR, - 3)); + qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3)); case PIPE_BLENDFACTOR_ZERO: return qir_uniform_f(c, 0.0); case PIPE_BLENDFACTOR_INV_SRC_COLOR: @@ -1529,15 +1512,15 @@ vc4_blend_channel(struct vc4_compile *c, case PIPE_BLENDFACTOR_INV_CONST_COLOR: return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), - get_temp_for_uniform(c, - QUNIFORM_BLEND_CONST_COLOR, - channel))); + qir_uniform(c, + QUNIFORM_BLEND_CONST_COLOR, + channel))); case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), - get_temp_for_uniform(c, - QUNIFORM_BLEND_CONST_COLOR, - 3))); + qir_uniform(c, + QUNIFORM_BLEND_CONST_COLOR, + 3))); default: case PIPE_BLENDFACTOR_SRC1_COLOR: @@ -1661,7 +1644,7 @@ static void alpha_test_discard(struct vc4_compile *c) { struct qreg src_alpha; - struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0); + struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0); if (!c->fs_key->alpha_test) return; @@ -2171,6 +2154,7 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage, } qir_optimize(c); + qir_lower_uniforms(c); if (vc4_debug & VC4_DEBUG_QIR) { fprintf(stderr, "%s prog %d/%d QIR:\n", diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 9e0ee1f0ae5..5c1fdbddfb6 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -173,6 +173,12 @@ qir_is_multi_instruction(struct qinst *inst) return qir_op_info[inst->op].multi_instruction; } +bool +qir_is_tex(struct qinst *inst) +{ + return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT; +} + bool qir_depends_on_flags(struct qinst *inst) { @@ -420,9 +426,12 @@ qir_get_stage_name(enum qstage stage) void qir_SF(struct vc4_compile *c, struct qreg src) { - assert(!is_empty_list(&c->instructions)); - struct qinst *last_inst = (struct qinst *)c->instructions.prev; - if (last_inst->dst.file != src.file || + struct qinst *last_inst = NULL; + if (!is_empty_list(&c->instructions)) + last_inst = (struct qinst *)c->instructions.prev; + + if (!last_inst || + last_inst->dst.file != src.file || last_inst->dst.index != src.index || qir_is_multi_instruction(last_inst)) { src = qir_MOV(c, src); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 6da6ff6542e..a1b55605584 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -376,6 +376,7 @@ bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); bool qir_is_multi_instruction(struct qinst *inst); +bool qir_is_tex(struct qinst *inst); bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); bool qir_reads_r4(struct qinst *inst); @@ -393,6 +394,7 @@ bool qir_opt_cse(struct vc4_compile *c); bool qir_opt_dead_code(struct vc4_compile *c); bool qir_opt_small_immediates(struct vc4_compile *c); bool qir_opt_vpm_writes(struct vc4_compile *c); +void qir_lower_uniforms(struct vc4_compile *c); void qpu_schedule_instructions(struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c new file mode 100644 index 00000000000..d527889e76f --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_opt_algebraic.c + * + * This is the optimization pass for miscellaneous changes to instructions + * where we can simplify the operation by some knowledge about the specific + * operations. + * + * Mostly this will be a matter of turning things into MOVs so that they can + * later be copy-propagated out. + */ + +#include "vc4_qir.h" +#include "util/hash_table.h" +#include "util/u_math.h" + +static inline uint32_t +index_hash(const void *key) +{ + return (uintptr_t)key; +} + +static inline bool +index_compare(const void *a, const void *b) +{ + return a == b; +} + +static void +add_uniform(struct hash_table *ht, struct qreg reg) +{ + struct hash_entry *entry; + void *key = (void *)(uintptr_t)reg.index; + + entry = _mesa_hash_table_search(ht, key); + if (entry) { + entry->data++; + } else { + _mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1); + } +} + +static void +remove_uniform(struct hash_table *ht, struct qreg reg) +{ + struct hash_entry *entry; + void *key = (void *)(uintptr_t)reg.index; + + entry = _mesa_hash_table_search(ht, key); + assert(entry); + entry->data--; + if (entry->data == NULL) + _mesa_hash_table_remove(ht, entry); +} + +static bool +is_lowerable_uniform(struct qinst *inst, int i) +{ + if (inst->src[i].file != QFILE_UNIF) + return false; + if (qir_is_tex(inst)) + return i != 1; + return true; +} + +void +qir_lower_uniforms(struct vc4_compile *c) +{ + struct simple_node *node; + struct hash_table *ht = + _mesa_hash_table_create(c, index_hash, index_compare); + + /* Walk the instruction list, finding which instructions have more + * than one uniform referenced, and add those uniform values to the + * ht. + */ + foreach(node, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + uint32_t nsrc = qir_get_op_nsrc(inst->op); + + uint32_t count = 0; + for (int i = 0; i < nsrc; i++) { + if (inst->src[i].file == QFILE_UNIF) + count++; + } + + if (count <= 1) + continue; + + for (int i = 0; i < nsrc; i++) { + if (is_lowerable_uniform(inst, i)) + add_uniform(ht, inst->src[i]); + } + } + + while (ht->entries) { + /* Find the most commonly used uniform in instructions that + * need a uniform lowered. + */ + uint32_t max_count = 0; + uint32_t max_index = 0; + struct hash_entry *entry; + hash_table_foreach(ht, entry) { + uint32_t count = (uintptr_t)entry->data; + uint32_t index = (uintptr_t)entry->key; + if (count > max_count) { + max_count = count; + max_index = index; + } + } + + /* Now, find the instructions using this uniform and make them + * reference a temp instead. + */ + struct qreg temp = qir_get_temp(c); + struct qreg unif = { QFILE_UNIF, max_index }; + struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef); + insert_at_head(&c->instructions, &mov->link); + foreach(node, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + uint32_t nsrc = qir_get_op_nsrc(inst->op); + + uint32_t count = 0; + for (int i = 0; i < nsrc; i++) { + if (inst->src[i].file == QFILE_UNIF) + count++; + } + + if (count <= 1) + continue; + + for (int i = 0; i < nsrc; i++) { + if (is_lowerable_uniform(inst, i) && + inst->src[i].index == max_index) { + inst->src[i] = temp; + remove_uniform(ht, unif); + count--; + } + } + + /* If the instruction doesn't need lowering any more, + * then drop it from the list. + */ + if (count <= 1) { + for (int i = 0; i < nsrc; i++) { + if (is_lowerable_uniform(inst, i)) + remove_uniform(ht, inst->src[i]); + } + } + } + } + + _mesa_hash_table_destroy(ht, NULL); +}