broadcom: Add VC5 NIR compiler.

This is a pretty straightforward fork of VC4's NIR compiler to VC5.  The
condition codes, registers, and I/O have all changed, making the backend
hard to share, though their heritage is still recognizable.

v2: Move to src/broadcom/compiler to match intel's layout, rename more
    "vc5" to "v3d", rename QIR to VIR ("V3D IR") to avoid symbol conflicts
    with vc4, use new v3d_debug header, add compiler init/free functions,
    do texture swizzling in NIR to allow optimization.
This commit is contained in:
Eric Anholt 2017-02-03 10:24:14 -08:00
parent f71364f297
commit ade416d023
17 changed files with 7498 additions and 0 deletions

View File

@ -26,6 +26,8 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/broadcom/ \
-I$(top_srcdir)/src/broadcom/include \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include \
$(VALGRIND_CFLAGS) \
$(DEFINES)

View File

@ -16,6 +16,19 @@ BROADCOM_FILES = \
clif/clif_dump.c \
clif/clif_dump.h \
common/v3d_device_info.h \
compiler/nir_to_vir.c \
compiler/vir.c \
compiler/vir_dump.c \
compiler/vir_live_variables.c \
compiler/vir_lower_uniforms.c \
compiler/vir_opt_copy_propagate.c \
compiler/vir_opt_dead_code.c \
compiler/vir_register_allocate.c \
compiler/vir_to_qpu.c \
compiler/qpu_schedule.c \
compiler/qpu_validate.c \
compiler/v3d_compiler.h \
compiler/v3d_nir_lower_io.c \
qpu/qpu_disasm.c \
qpu/qpu_disasm.h \
qpu/qpu_instr.c \

View File

@ -13,6 +13,7 @@ check_PROGRAMS += \
LDADD = \
libbroadcom.la \
$(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/util/libmesautil.la \
$(NULL)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,208 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file
*
* Validates the QPU instruction sequence after register allocation and
* scheduling.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "v3d_compiler.h"
#include "qpu/qpu_disasm.h"
struct v3d_qpu_validate_state {
struct v3d_compile *c;
const struct v3d_qpu_instr *last;
int ip;
int last_sfu_write;
};
static void
fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
{
struct v3d_compile *c = state->c;
fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
int dump_ip = 0;
vir_for_each_inst_inorder(inst, c) {
v3d_qpu_dump(c->devinfo, &inst->qpu);
if (dump_ip++ == state->ip)
fprintf(stderr, " *** ERROR ***");
fprintf(stderr, "\n");
}
fprintf(stderr, "\n");
abort();
}
static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
bool (*predicate)(enum v3d_qpu_waddr waddr))
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
return false;
if (inst->alu.add.op != V3D_QPU_A_NOP &&
inst->alu.add.magic_write &&
predicate(inst->alu.add.waddr))
return true;
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
inst->alu.mul.magic_write &&
predicate(inst->alu.mul.waddr))
return true;
return false;
}
static void
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
{
const struct v3d_qpu_instr *inst = &qinst->qpu;
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
return;
/* LDVARY writes r5 two instructions later and LDUNIF writes
* r5 one instruction later, which is illegal to have
* together.
*/
if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
fail_instr(state, "LDUNIF after a LDVARY");
}
int tmu_writes = 0;
int sfu_writes = 0;
int vpm_writes = 0;
int tlb_writes = 0;
int tsy_writes = 0;
if (inst->alu.add.op != V3D_QPU_A_NOP) {
if (inst->alu.add.magic_write) {
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
tmu_writes++;
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
sfu_writes++;
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
vpm_writes++;
if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
tlb_writes++;
if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
tsy_writes++;
}
}
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
if (inst->alu.mul.magic_write) {
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
tmu_writes++;
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
sfu_writes++;
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
vpm_writes++;
if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
tlb_writes++;
if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
tsy_writes++;
}
}
(void)qpu_magic_waddr_matches; /* XXX */
/* SFU r4 results come back two instructions later. No doing
* r4 read/writes or other SFU lookups until it's done.
*/
if (state->ip - state->last_sfu_write < 2) {
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
fail_instr(state, "R4 read too soon after SFU");
if (v3d_qpu_writes_r4(inst))
fail_instr(state, "R4 write too soon after SFU");
if (sfu_writes)
fail_instr(state, "SFU write too soon after SFU");
}
/* XXX: The docs say VPM can happen with the others, but the simulator
* disagrees.
*/
if (tmu_writes +
sfu_writes +
vpm_writes +
tlb_writes +
tsy_writes +
inst->sig.ldtmu +
inst->sig.ldtlb +
inst->sig.ldvpm +
inst->sig.ldtlbu > 1) {
fail_instr(state,
"Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
}
if (sfu_writes)
state->last_sfu_write = state->ip;
}
static void
qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
{
vir_for_each_inst(qinst, block) {
qpu_validate_inst(state, qinst);
state->last = &qinst->qpu;
state->ip++;
}
}
/**
* Checks for the instruction restrictions from page 37 ("Summary of
* Instruction Restrictions").
*/
void
qpu_validate(struct v3d_compile *c)
{
/* We don't want to do validation in release builds, but we want to
* keep compiling the validation code to make sure it doesn't get
* broken.
*/
#ifndef DEBUG
return;
#endif
struct v3d_qpu_validate_state state = {
.c = c,
.last_sfu_write = -10,
.ip = 0,
};
vir_for_each_block(block, c) {
qpu_validate_block(&state, block);
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright © 2016 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
struct v3d_compiler *
v3d_compiler_init(void)
{
struct v3d_compile *c = rzalloc(struct v3d_compile);
return c;
}
void
v3d_add_qpu_inst(struct v3d_compiler *c, uint64_t inst)
{
if (c->qpu_inst_count >= c->qpu_inst_size) {
c->qpu_inst_size = MAX2(c->qpu_inst_size * 2, 16);
c->qpu_insts = reralloc(c, c->qpu_insts, uint64_t,
c->qpu_inst_size_array_size);
}
c->qpu_insts[c->qpu_inst_count++] = inst;
}

View File

@ -0,0 +1,927 @@
/*
* Copyright © 2016 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef V3D_COMPILER_H
#define V3D_COMPILER_H
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "util/macros.h"
#include "common/v3d_debug.h"
#include "compiler/nir/nir.h"
#include "util/list.h"
#include "util/u_math.h"
#include "qpu/qpu_instr.h"
#include "pipe/p_state.h"
#define V3D_MAX_TEXTURE_SAMPLERS 32
#define V3D_MAX_SAMPLES 4
#define V3D_MAX_FS_INPUTS 64
#define V3D_MAX_VS_INPUTS 64
struct nir_builder;
struct v3d_fs_inputs {
/**
* Array of the meanings of the VPM inputs this shader needs.
*
* It doesn't include those that aren't part of the VPM, like
* point/line coordinates.
*/
struct v3d_varying_slot *input_slots;
uint32_t num_inputs;
};
enum qfile {
/** An unused source or destination register. */
QFILE_NULL,
/** A physical register, such as the W coordinate payload. */
QFILE_REG,
/** One of the regsiters for fixed function interactions. */
QFILE_MAGIC,
/**
* A virtual register, that will be allocated to actual accumulator
* or physical registers later.
*/
QFILE_TEMP,
QFILE_VARY,
QFILE_UNIF,
QFILE_TLB,
QFILE_TLBU,
/**
* VPM reads use this with an index value to say what part of the VPM
* is being read.
*/
QFILE_VPM,
/**
* Stores an immediate value in the index field that will be used
* directly by qpu_load_imm().
*/
QFILE_LOAD_IMM,
/**
* Stores an immediate value in the index field that can be turned
* into a small immediate field by qpu_encode_small_immediate().
*/
QFILE_SMALL_IMM,
};
/**
* A reference to a QPU register or a virtual temp register.
*/
struct qreg {
enum qfile file;
uint32_t index;
};
static inline struct qreg vir_reg(enum qfile file, uint32_t index)
{
return (struct qreg){file, index};
}
/**
* A reference to an actual register at the QPU level, for register
* allocation.
*/
struct qpu_reg {
bool magic;
int index;
};
struct qinst {
/** Entry in qblock->instructions */
struct list_head link;
/**
* The instruction being wrapped. Its condition codes, pack flags,
* signals, etc. will all be used, with just the register references
* being replaced by the contents of qinst->dst and qinst->src[].
*/
struct v3d_qpu_instr qpu;
/* Pre-register-allocation references to src/dst registers */
struct qreg dst;
struct qreg src[3];
bool cond_is_exec_mask;
bool has_implicit_uniform;
/* After vir_to_qpu.c: If instr reads a uniform, which uniform from
* the uncompiled stream it is.
*/
int uniform;
};
enum quniform_contents {
/**
* Indicates that a constant 32-bit value is copied from the program's
* uniform contents.
*/
QUNIFORM_CONSTANT,
/**
* Indicates that the program's uniform contents are used as an index
* into the GL uniform storage.
*/
QUNIFORM_UNIFORM,
/** @{
* Scaling factors from clip coordinates to relative to the viewport
* center.
*
* This is used by the coordinate and vertex shaders to produce the
* 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
* point offsets from the viewport ccenter.
*/
QUNIFORM_VIEWPORT_X_SCALE,
QUNIFORM_VIEWPORT_Y_SCALE,
/** @} */
QUNIFORM_VIEWPORT_Z_OFFSET,
QUNIFORM_VIEWPORT_Z_SCALE,
QUNIFORM_USER_CLIP_PLANE,
/**
* A reference to a texture config parameter 0 uniform.
*
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
* defines texture type, miplevels, and such. It will be found as a
* parameter to the first QOP_TEX_[STRB] instruction in a sequence.
*/
QUNIFORM_TEXTURE_CONFIG_P0_0,
QUNIFORM_TEXTURE_CONFIG_P0_1,
QUNIFORM_TEXTURE_CONFIG_P0_2,
QUNIFORM_TEXTURE_CONFIG_P0_3,
QUNIFORM_TEXTURE_CONFIG_P0_4,
QUNIFORM_TEXTURE_CONFIG_P0_5,
QUNIFORM_TEXTURE_CONFIG_P0_6,
QUNIFORM_TEXTURE_CONFIG_P0_7,
QUNIFORM_TEXTURE_CONFIG_P0_8,
QUNIFORM_TEXTURE_CONFIG_P0_9,
QUNIFORM_TEXTURE_CONFIG_P0_10,
QUNIFORM_TEXTURE_CONFIG_P0_11,
QUNIFORM_TEXTURE_CONFIG_P0_12,
QUNIFORM_TEXTURE_CONFIG_P0_13,
QUNIFORM_TEXTURE_CONFIG_P0_14,
QUNIFORM_TEXTURE_CONFIG_P0_15,
QUNIFORM_TEXTURE_CONFIG_P0_16,
QUNIFORM_TEXTURE_CONFIG_P0_17,
QUNIFORM_TEXTURE_CONFIG_P0_18,
QUNIFORM_TEXTURE_CONFIG_P0_19,
QUNIFORM_TEXTURE_CONFIG_P0_20,
QUNIFORM_TEXTURE_CONFIG_P0_21,
QUNIFORM_TEXTURE_CONFIG_P0_22,
QUNIFORM_TEXTURE_CONFIG_P0_23,
QUNIFORM_TEXTURE_CONFIG_P0_24,
QUNIFORM_TEXTURE_CONFIG_P0_25,
QUNIFORM_TEXTURE_CONFIG_P0_26,
QUNIFORM_TEXTURE_CONFIG_P0_27,
QUNIFORM_TEXTURE_CONFIG_P0_28,
QUNIFORM_TEXTURE_CONFIG_P0_29,
QUNIFORM_TEXTURE_CONFIG_P0_30,
QUNIFORM_TEXTURE_CONFIG_P0_31,
QUNIFORM_TEXTURE_CONFIG_P0_32,
/**
* A reference to a texture config parameter 1 uniform.
*
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
* defines texture width, height, filters, and wrap modes. It will be
* found as a parameter to the second QOP_TEX_[STRB] instruction in a
* sequence.
*/
QUNIFORM_TEXTURE_CONFIG_P1,
QUNIFORM_TEXTURE_FIRST_LEVEL,
QUNIFORM_TEXTURE_WIDTH,
QUNIFORM_TEXTURE_HEIGHT,
QUNIFORM_TEXTURE_DEPTH,
QUNIFORM_TEXTURE_ARRAY_SIZE,
QUNIFORM_TEXTURE_LEVELS,
QUNIFORM_TEXTURE_MSAA_ADDR,
QUNIFORM_UBO_ADDR,
QUNIFORM_TEXRECT_SCALE_X,
QUNIFORM_TEXRECT_SCALE_Y,
QUNIFORM_TEXTURE_BORDER_COLOR,
QUNIFORM_STENCIL,
QUNIFORM_ALPHA_REF,
QUNIFORM_SAMPLE_MASK,
};
struct v3d_varying_slot {
uint8_t slot_and_component;
};
static inline struct v3d_varying_slot
v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component)
{
assert(slot < 255 / 4);
return (struct v3d_varying_slot){ (slot << 2) + component };
}
static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot)
{
return slot.slot_and_component >> 2;
}
static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
{
return slot.slot_and_component & 3;
}
struct v3d_ubo_range {
/**
* offset in bytes from the start of the ubo where this range is
* uploaded.
*
* Only set once used is set.
*/
uint32_t dst_offset;
/**
* offset in bytes from the start of the gallium uniforms where the
* data comes from.
*/
uint32_t src_offset;
/** size in bytes of this ubo range */
uint32_t size;
};
struct v3d_key {
void *shader_state;
struct {
uint8_t swizzle[4];
uint8_t return_size;
uint8_t return_channels;
union {
struct {
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned wrap_s:3;
unsigned wrap_t:3;
};
struct {
uint16_t msaa_width, msaa_height;
};
};
} tex[V3D_MAX_TEXTURE_SAMPLERS];
uint8_t ucp_enables;
};
struct v3d_fs_key {
struct v3d_key base;
bool depth_enabled;
bool is_points;
bool is_lines;
bool alpha_test;
bool point_coord_upper_left;
bool light_twoside;
bool msaa;
bool sample_coverage;
bool sample_alpha_to_coverage;
bool sample_alpha_to_one;
bool clamp_color;
bool swap_color_rb;
uint8_t alpha_test_func;
uint8_t logicop_func;
uint32_t point_sprite_mask;
struct pipe_rt_blend_state blend;
};
struct v3d_vs_key {
struct v3d_key base;
struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS];
uint8_t num_fs_inputs;
bool is_coord;
bool per_vertex_point_size;
bool clamp_color;
};
/** A basic block of VIR intructions. */
struct qblock {
struct list_head link;
struct list_head instructions;
struct set *predecessors;
struct qblock *successors[2];
int index;
/* Instruction IPs for the first and last instruction of the block.
* Set by qpu_schedule.c.
*/
uint32_t start_qpu_ip;
uint32_t end_qpu_ip;
/* Instruction IP for the branch instruction of the block. Set by
* qpu_schedule.c.
*/
uint32_t branch_qpu_ip;
/** Offset within the uniform stream at the start of the block. */
uint32_t start_uniform;
/** Offset within the uniform stream of the branch instruction */
uint32_t branch_uniform;
/** @{ used by v3d_vir_live_variables.c */
BITSET_WORD *def;
BITSET_WORD *use;
BITSET_WORD *live_in;
BITSET_WORD *live_out;
int start_ip, end_ip;
/** @} */
};
/**
* Compiler state saved across compiler invocations, for any expensive global
* setup.
*/
struct v3d_compiler {
const struct v3d_device_info *devinfo;
struct ra_regs *regs;
unsigned int reg_class[3];
};
struct v3d_compile {
const struct v3d_device_info *devinfo;
nir_shader *s;
nir_function_impl *impl;
struct exec_list *cf_node_list;
const struct v3d_compiler *compiler;
/**
* Mapping from nir_register * or nir_ssa_def * to array of struct
* qreg for the values.
*/
struct hash_table *def_ht;
/* For each temp, the instruction generating its value. */
struct qinst **defs;
uint32_t defs_array_size;
/**
* Inputs to the shader, arranged by TGSI declaration order.
*
* Not all fragment shader QFILE_VARY reads are present in this array.
*/
struct qreg *inputs;
struct qreg *outputs;
bool msaa_per_sample_output;
struct qreg color_reads[V3D_MAX_SAMPLES];
struct qreg sample_colors[V3D_MAX_SAMPLES];
uint32_t inputs_array_size;
uint32_t outputs_array_size;
uint32_t uniforms_array_size;
/* Booleans for whether the corresponding QFILE_VARY[i] is
* flat-shaded. This doesn't count gl_FragColor flat-shading, which is
* controlled by shader->color_inputs and rasterizer->flatshade in the
* gallium driver.
*/
BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
struct v3d_ubo_range *ubo_ranges;
bool *ubo_range_used;
uint32_t ubo_ranges_array_size;
/** Number of uniform areas tracked in ubo_ranges. */
uint32_t num_ubo_ranges;
uint32_t next_ubo_dst_offset;
/* State for whether we're executing on each channel currently. 0 if
* yes, otherwise a block number + 1 that the channel jumped to.
*/
struct qreg execute;
struct qreg line_x, point_x, point_y;
/**
* Instance ID, which comes in before the vertex attribute payload if
* the shader record requests it.
*/
struct qreg iid;
/**
* Vertex ID, which comes in before the vertex attribute payload
* (after Instance ID) if the shader record requests it.
*/
struct qreg vid;
/* Fragment shader payload regs. */
struct qreg payload_w, payload_w_centroid, payload_z;
/** boolean (~0 -> true) if the fragment has been discarded. */
struct qreg discard;
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
uint32_t num_vpm_writes;
/**
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
*
* This includes those that aren't part of the VPM varyings, like
* point/line coordinates.
*/
struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
/**
* An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
* of the output is. Used to emit from the VS in the order that the
* FS needs.
*/
struct v3d_varying_slot *output_slots;
struct pipe_shader_state *shader_state;
struct v3d_key *key;
struct v3d_fs_key *fs_key;
struct v3d_vs_key *vs_key;
/* Live ranges of temps. */
int *temp_start, *temp_end;
uint32_t *uniform_data;
enum quniform_contents *uniform_contents;
uint32_t uniform_array_size;
uint32_t num_uniforms;
uint32_t num_outputs;
uint32_t output_position_index;
nir_variable *output_color_var;
uint32_t output_point_size_index;
uint32_t output_sample_mask_index;
struct qreg undef;
uint32_t num_temps;
struct list_head blocks;
int next_block_index;
struct qblock *cur_block;
struct qblock *loop_cont_block;
struct qblock *loop_break_block;
uint64_t *qpu_insts;
uint32_t qpu_inst_count;
uint32_t qpu_inst_size;
/* For the FS, the number of varying inputs not counting the
* point/line varyings payload
*/
uint32_t num_inputs;
/**
* Number of inputs from num_inputs remaining to be queued to the read
* FIFO in the VS/CS.
*/
uint32_t num_inputs_remaining;
/* Number of inputs currently in the read FIFO for the VS/CS */
uint32_t num_inputs_in_fifo;
/** Next offset in the VPM to read from in the VS/CS */
uint32_t vpm_read_offset;
uint32_t program_id;
uint32_t variant_id;
/* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
* is used to hide texturing latency at the cost of limiting ourselves
* to the bottom half of physical reg space.
*/
bool fs_threaded;
bool last_thrsw_at_top_level;
bool failed;
};
struct v3d_uniform_list {
enum quniform_contents *contents;
uint32_t *data;
uint32_t count;
};
struct v3d_prog_data {
struct v3d_uniform_list uniforms;
struct v3d_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
uint8_t num_inputs;
};
struct v3d_vs_prog_data {
struct v3d_prog_data base;
bool uses_iid, uses_vid;
/* Number of components read from each vertex attribute. */
uint8_t vattr_sizes[32];
/* Total number of components read, for the shader state record. */
uint32_t vpm_input_size;
/* Total number of components written, for the shader state record. */
uint32_t vpm_output_size;
};
struct v3d_fs_prog_data {
struct v3d_prog_data base;
struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
/* Bitmask for whether the corresponding input is flat-shaded,
* independent of rasterizer (gl_FragColor) flat-shading.
*/
BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
bool writes_z;
};
/* Special nir_load_input intrinsic index for loading the current TLB
* destination color.
*/
#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000
#define V3D_NIR_MS_MASK_OUTPUT 2000000000
extern const nir_shader_compiler_options v3d_nir_options;
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
void v3d_compiler_free(const struct v3d_compiler *compiler);
void v3d_optimize_nir(struct nir_shader *s);
uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
struct v3d_vs_key *key,
struct v3d_vs_prog_data *prog_data,
nir_shader *s,
int program_id, int variant_id,
uint32_t *final_assembly_size);
uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
struct v3d_fs_key *key,
struct v3d_fs_prog_data *prog_data,
nir_shader *s,
int program_id, int variant_id,
uint32_t *final_assembly_size);
void v3d_nir_to_vir(struct v3d_compile *c);
void vir_compile_destroy(struct v3d_compile *c);
const char *vir_get_stage_name(struct v3d_compile *c);
struct qblock *vir_new_block(struct v3d_compile *c);
void vir_set_emit_block(struct v3d_compile *c, struct qblock *block);
void vir_link_blocks(struct qblock *predecessor, struct qblock *successor);
struct qblock *vir_entry_block(struct v3d_compile *c);
struct qblock *vir_exit_block(struct v3d_compile *c);
struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst,
struct qreg src0, struct qreg src1);
struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst,
struct qreg src0, struct qreg src1);
struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0);
void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst);
struct qreg vir_uniform(struct v3d_compile *c,
enum quniform_contents contents,
uint32_t data);
void vir_schedule_instructions(struct v3d_compile *c);
struct v3d_qpu_instr v3d_qpu_nop(void);
struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
void vir_set_unpack(struct qinst *inst, int src,
enum v3d_qpu_input_unpack unpack);
struct qreg vir_get_temp(struct v3d_compile *c);
void vir_calculate_live_intervals(struct v3d_compile *c);
bool vir_has_implicit_uniform(struct qinst *inst);
int vir_get_implicit_uniform_src(struct qinst *inst);
int vir_get_non_sideband_nsrc(struct qinst *inst);
int vir_get_nsrc(struct qinst *inst);
bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
bool vir_is_raw_mov(struct qinst *inst);
bool vir_is_tex(struct qinst *inst);
bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
bool vir_is_float_input(struct qinst *inst);
bool vir_depends_on_flags(struct qinst *inst);
bool vir_writes_r3(struct qinst *inst);
bool vir_writes_r4(struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
uint8_t vir_channels_written(struct qinst *inst);
void vir_dump(struct v3d_compile *c);
void vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
void vir_validate(struct v3d_compile *c);
void vir_optimize(struct v3d_compile *c);
bool vir_opt_algebraic(struct v3d_compile *c);
bool vir_opt_constant_folding(struct v3d_compile *c);
bool vir_opt_copy_propagate(struct v3d_compile *c);
bool vir_opt_dead_code(struct v3d_compile *c);
bool vir_opt_peephole_sf(struct v3d_compile *c);
bool vir_opt_small_immediates(struct v3d_compile *c);
bool vir_opt_vpm(struct v3d_compile *c);
void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
void vir_lower_uniforms(struct v3d_compile *c);
void v3d_vir_to_qpu(struct v3d_compile *c);
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
void qpu_validate(struct v3d_compile *c);
struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);
bool vir_init_reg_sets(struct v3d_compiler *compiler);
void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
static inline bool
quniform_contents_is_texture_p0(enum quniform_contents contents)
{
return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 &&
contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 +
V3D_MAX_TEXTURE_SAMPLERS));
}
static inline struct qreg
vir_uniform_ui(struct v3d_compile *c, uint32_t ui)
{
return vir_uniform(c, QUNIFORM_CONSTANT, ui);
}
static inline struct qreg
vir_uniform_f(struct v3d_compile *c, float f)
{
return vir_uniform(c, QUNIFORM_CONSTANT, fui(f));
}
#define VIR_ALU0(name, vir_inst, op) \
static inline struct qreg \
vir_##name(struct v3d_compile *c) \
{ \
return vir_emit_def(c, vir_inst(op, c->undef, \
c->undef, c->undef)); \
} \
static inline struct qinst * \
vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \
{ \
return vir_emit_nondef(c, vir_inst(op, dest, \
c->undef, c->undef)); \
}
#define VIR_ALU1(name, vir_inst, op) \
static inline struct qreg \
vir_##name(struct v3d_compile *c, struct qreg a) \
{ \
return vir_emit_def(c, vir_inst(op, c->undef, \
a, c->undef)); \
} \
static inline struct qinst * \
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
struct qreg a) \
{ \
return vir_emit_nondef(c, vir_inst(op, dest, a, \
c->undef)); \
}
#define VIR_ALU2(name, vir_inst, op) \
static inline struct qreg \
vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
{ \
return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \
} \
static inline struct qinst * \
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
struct qreg a, struct qreg b) \
{ \
return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \
}
#define VIR_NODST_1(name, vir_inst, op) \
static inline struct qinst * \
vir_##name(struct v3d_compile *c, struct qreg a) \
{ \
return vir_emit_nondef(c, vir_inst(op, c->undef, \
a, c->undef)); \
}
#define VIR_NODST_2(name, vir_inst, op) \
static inline struct qinst * \
vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
{ \
return vir_emit_nondef(c, vir_inst(op, c->undef, \
a, b)); \
}
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name)
VIR_A_ALU2(FADD)
VIR_A_ALU2(VFPACK)
VIR_A_ALU2(FSUB)
VIR_A_ALU2(FMIN)
VIR_A_ALU2(FMAX)
VIR_A_ALU2(ADD)
VIR_A_ALU2(SUB)
VIR_A_ALU2(SHL)
VIR_A_ALU2(SHR)
VIR_A_ALU2(ASR)
VIR_A_ALU2(ROR)
VIR_A_ALU2(MIN)
VIR_A_ALU2(MAX)
VIR_A_ALU2(UMIN)
VIR_A_ALU2(UMAX)
VIR_A_ALU2(AND)
VIR_A_ALU2(OR)
VIR_A_ALU2(XOR)
VIR_A_ALU2(VADD)
VIR_A_ALU2(VSUB)
VIR_A_ALU1(NOT)
VIR_A_ALU1(NEG)
VIR_A_ALU1(FLAPUSH)
VIR_A_ALU1(FLBPUSH)
VIR_A_ALU1(FLBPOP)
VIR_A_ALU1(SETMSF)
VIR_A_ALU1(SETREVF)
VIR_A_ALU1(TIDX)
VIR_A_ALU1(EIDX)
VIR_A_ALU0(FXCD)
VIR_A_ALU0(XCD)
VIR_A_ALU0(FYCD)
VIR_A_ALU0(YCD)
VIR_A_ALU0(MSF)
VIR_A_ALU0(REVF)
VIR_A_NODST_1(VPMSETUP)
VIR_A_ALU2(FCMP)
VIR_A_ALU2(VFMAX)
VIR_A_ALU1(FROUND)
VIR_A_ALU1(FTOIN)
VIR_A_ALU1(FTRUNC)
VIR_A_ALU1(FTOIZ)
VIR_A_ALU1(FFLOOR)
VIR_A_ALU1(FTOUZ)
VIR_A_ALU1(FCEIL)
VIR_A_ALU1(FTOC)
VIR_A_ALU1(FDX)
VIR_A_ALU1(FDY)
VIR_A_ALU1(ITOF)
VIR_A_ALU1(CLZ)
VIR_A_ALU1(UTOF)
VIR_M_ALU2(UMUL24)
VIR_M_ALU2(FMUL)
VIR_M_ALU2(SMUL24)
VIR_M_NODST_2(MULTOP)
VIR_M_ALU1(MOV)
VIR_M_ALU1(FMOV)
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)
{
struct qinst *mov = vir_MOV_dest(c, dest, src);
vir_set_cond(mov, cond);
return mov;
}
static inline struct qreg
vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg src0, struct qreg src1)
{
struct qreg t = vir_get_temp(c);
vir_MOV_dest(c, t, src1);
vir_MOV_cond(c, cond, t, src0);
return t;
}
static inline void
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
{
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
}
static inline struct qinst *
vir_NOP(struct v3d_compile *c)
{
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP,
c->undef, c->undef, c->undef));
}
/*
static inline struct qreg
vir_LOAD_IMM(struct v3d_compile *c, uint32_t val)
{
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef,
vir_reg(QFILE_LOAD_IMM, val), c->undef));
}
static inline struct qreg
vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val)
{
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef,
vir_reg(QFILE_LOAD_IMM, val),
c->undef));
}
static inline struct qreg
vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val)
{
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef,
vir_reg(QFILE_LOAD_IMM, val),
c->undef));
}
*/
static inline struct qinst *
vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond)
{
/* The actual uniform_data value will be set at scheduling time */
return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0)));
}
#define vir_for_each_block(block, c) \
list_for_each_entry(struct qblock, block, &c->blocks, link)
#define vir_for_each_block_rev(block, c) \
list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
/* Loop over the non-NULL members of the successors array. */
#define vir_for_each_successor(succ, block) \
for (struct qblock *succ = block->successors[0]; \
succ != NULL; \
succ = (succ == block->successors[1] ? NULL : \
block->successors[1]))
#define vir_for_each_inst(inst, block) \
list_for_each_entry(struct qinst, inst, &block->instructions, link)
#define vir_for_each_inst_rev(inst, block) \
list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
#define vir_for_each_inst_safe(inst, block) \
list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
#define vir_for_each_inst_inorder(inst, c) \
vir_for_each_block(_block, c) \
vir_for_each_inst(inst, _block)
#endif /* V3D_COMPILER_H */

View File

@ -0,0 +1,176 @@
/*
* Copyright © 2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
/**
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
* intrinsics into something amenable to the V3D architecture.
*
* Currently, it splits VS inputs and uniforms into scalars, drops any
* non-position outputs in coordinate shaders, and fixes up the addressing on
* indirect uniform loads. FS input and VS output scalarization is handled by
* nir_lower_io_to_scalar().
*/
static void
replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr,
nir_ssa_def **comps)
{
/* Batch things back together into a vector. This will get split by
* the later ALU scalarization pass.
*/
nir_ssa_def *vec = nir_vec(b, comps, intr->num_components);
/* Replace the old intrinsic with a reference to our reconstructed
* vector.
*/
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
nir_instr_remove(&intr->instr);
}
static void
v3d_nir_lower_output(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
nir_variable *output_var = NULL;
nir_foreach_variable(var, &c->s->outputs) {
if (var->data.driver_location == nir_intrinsic_base(intr)) {
output_var = var;
break;
}
}
assert(output_var);
if (c->vs_key) {
int slot = output_var->data.location;
bool used = false;
switch (slot) {
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_POS:
used = true;
break;
default:
for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
if (v3d_slot_get_slot(c->vs_key->fs_inputs[i]) == slot) {
used = true;
break;
}
}
break;
}
if (!used)
nir_instr_remove(&intr->instr);
}
}
static void
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
/* Generate scalar loads equivalent to the original vector. */
nir_ssa_def *dests[4];
for (unsigned i = 0; i < intr->num_components; i++) {
nir_intrinsic_instr *intr_comp =
nir_intrinsic_instr_create(c->s, intr->intrinsic);
intr_comp->num_components = 1;
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
/* Convert the uniform offset to bytes. If it happens
* to be a constant, constant-folding will clean up
* the shift for us.
*/
nir_intrinsic_set_base(intr_comp,
nir_intrinsic_base(intr) * 16 +
i * 4);
intr_comp->src[0] =
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
nir_imm_int(b, 4)));
dests[i] = &intr_comp->dest.ssa;
nir_builder_instr_insert(b, &intr_comp->instr);
}
replace_intrinsic_with_vec(b, intr, dests);
}
static void
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
struct nir_instr *instr)
{
if (instr->type != nir_instr_type_intrinsic)
return;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
break;
case nir_intrinsic_store_output:
v3d_nir_lower_output(c, b, intr);
break;
case nir_intrinsic_load_uniform:
v3d_nir_lower_uniform(c, b, intr);
break;
case nir_intrinsic_load_user_clip_plane:
default:
break;
}
}
static bool
v3d_nir_lower_io_impl(struct v3d_compile *c, nir_function_impl *impl)
{
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block)
v3d_nir_lower_io_instr(c, &b, instr);
}
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
return true;
}
void
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
{
nir_foreach_function(function, s) {
if (function->impl)
v3d_nir_lower_io_impl(c, function->impl);
}
}

907
src/broadcom/compiler/vir.c Normal file
View File

@ -0,0 +1,907 @@
/*
* Copyright © 2016-2017 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3d_compiler.h"
int
vir_get_non_sideband_nsrc(struct qinst *inst)
{
switch (inst->qpu.type) {
case V3D_QPU_INSTR_TYPE_BRANCH:
return 0;
case V3D_QPU_INSTR_TYPE_ALU:
if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
else
return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
}
return 0;
}
int
vir_get_nsrc(struct qinst *inst)
{
int nsrc = vir_get_non_sideband_nsrc(inst);
if (vir_has_implicit_uniform(inst))
nsrc++;
return nsrc;
}
bool
vir_has_implicit_uniform(struct qinst *inst)
{
switch (inst->qpu.type) {
case V3D_QPU_INSTR_TYPE_BRANCH:
return true;
case V3D_QPU_INSTR_TYPE_ALU:
switch (inst->dst.file) {
case QFILE_TLBU:
return true;
default:
return inst->has_implicit_uniform;
}
}
return false;
}
/* The sideband uniform for textures gets stored after the normal ALU
* arguments.
*/
int
vir_get_implicit_uniform_src(struct qinst *inst)
{
return vir_get_nsrc(inst) - 1;
}
/**
* Returns whether the instruction has any side effects that must be
* preserved.
*/
bool
vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
{
switch (inst->qpu.type) {
case V3D_QPU_INSTR_TYPE_BRANCH:
return true;
case V3D_QPU_INSTR_TYPE_ALU:
switch (inst->qpu.alu.add.op) {
case V3D_QPU_A_SETREVF:
case V3D_QPU_A_SETMSF:
case V3D_QPU_A_VPMSETUP:
return true;
default:
break;
}
switch (inst->qpu.alu.mul.op) {
case V3D_QPU_M_MULTOP:
return true;
default:
break;
}
}
if (inst->qpu.sig.ldtmu)
return true;
return false;
}
bool
vir_is_float_input(struct qinst *inst)
{
/* XXX: More instrs */
switch (inst->qpu.type) {
case V3D_QPU_INSTR_TYPE_BRANCH:
return false;
case V3D_QPU_INSTR_TYPE_ALU:
switch (inst->qpu.alu.add.op) {
case V3D_QPU_A_FADD:
case V3D_QPU_A_FSUB:
case V3D_QPU_A_FMIN:
case V3D_QPU_A_FMAX:
case V3D_QPU_A_FTOIN:
return true;
default:
break;
}
switch (inst->qpu.alu.mul.op) {
case V3D_QPU_M_FMOV:
case V3D_QPU_M_VFMUL:
case V3D_QPU_M_FMUL:
return true;
default:
break;
}
}
return false;
}
bool
vir_is_raw_mov(struct qinst *inst)
{
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
(inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
return false;
}
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
return false;
}
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
inst->qpu.flags.mc != V3D_QPU_COND_NONE)
return false;
return true;
}
bool
vir_is_add(struct qinst *inst)
{
return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
inst->qpu.alu.add.op != V3D_QPU_A_NOP);
}
bool
vir_is_mul(struct qinst *inst)
{
return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
}
bool
vir_is_tex(struct qinst *inst)
{
if (inst->dst.file == QFILE_MAGIC)
return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
return false;
}
bool
vir_depends_on_flags(struct qinst *inst)
{
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
} else {
return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
inst->qpu.flags.mc != V3D_QPU_COND_NONE);
}
}
bool
vir_writes_r3(struct qinst *inst)
{
for (int i = 0; i < vir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
case QFILE_VARY:
case QFILE_VPM:
return true;
default:
break;
}
}
return false;
}
bool
vir_writes_r4(struct qinst *inst)
{
switch (inst->dst.file) {
case QFILE_MAGIC:
switch (inst->dst.index) {
case V3D_QPU_WADDR_RECIP:
case V3D_QPU_WADDR_RSQRT:
case V3D_QPU_WADDR_EXP:
case V3D_QPU_WADDR_LOG:
case V3D_QPU_WADDR_SIN:
return true;
}
break;
default:
break;
}
if (inst->qpu.sig.ldtmu)
return true;
return false;
}
void
vir_set_unpack(struct qinst *inst, int src,
enum v3d_qpu_input_unpack unpack)
{
assert(src == 0 || src == 1);
if (vir_is_add(inst)) {
if (src == 0)
inst->qpu.alu.add.a_unpack = unpack;
else
inst->qpu.alu.add.b_unpack = unpack;
} else {
assert(vir_is_mul(inst));
if (src == 0)
inst->qpu.alu.mul.a_unpack = unpack;
else
inst->qpu.alu.mul.b_unpack = unpack;
}
}
void
vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
{
if (vir_is_add(inst)) {
inst->qpu.flags.ac = cond;
} else {
assert(vir_is_mul(inst));
inst->qpu.flags.mc = cond;
}
}
void
vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
{
if (vir_is_add(inst)) {
inst->qpu.flags.apf = pf;
} else {
assert(vir_is_mul(inst));
inst->qpu.flags.mpf = pf;
}
}
#if 0
uint8_t
vir_channels_written(struct qinst *inst)
{
if (vir_is_mul(inst)) {
switch (inst->dst.pack) {
case QPU_PACK_MUL_NOP:
case QPU_PACK_MUL_8888:
return 0xf;
case QPU_PACK_MUL_8A:
return 0x1;
case QPU_PACK_MUL_8B:
return 0x2;
case QPU_PACK_MUL_8C:
return 0x4;
case QPU_PACK_MUL_8D:
return 0x8;
}
} else {
switch (inst->dst.pack) {
case QPU_PACK_A_NOP:
case QPU_PACK_A_8888:
case QPU_PACK_A_8888_SAT:
case QPU_PACK_A_32_SAT:
return 0xf;
case QPU_PACK_A_8A:
case QPU_PACK_A_8A_SAT:
return 0x1;
case QPU_PACK_A_8B:
case QPU_PACK_A_8B_SAT:
return 0x2;
case QPU_PACK_A_8C:
case QPU_PACK_A_8C_SAT:
return 0x4;
case QPU_PACK_A_8D:
case QPU_PACK_A_8D_SAT:
return 0x8;
case QPU_PACK_A_16A:
case QPU_PACK_A_16A_SAT:
return 0x3;
case QPU_PACK_A_16B:
case QPU_PACK_A_16B_SAT:
return 0xc;
}
}
unreachable("Bad pack field");
}
#endif
struct qreg
vir_get_temp(struct v3d_compile *c)
{
struct qreg reg;
reg.file = QFILE_TEMP;
reg.index = c->num_temps++;
if (c->num_temps > c->defs_array_size) {
uint32_t old_size = c->defs_array_size;
c->defs_array_size = MAX2(old_size * 2, 16);
c->defs = reralloc(c, c->defs, struct qinst *,
c->defs_array_size);
memset(&c->defs[old_size], 0,
sizeof(c->defs[0]) * (c->defs_array_size - old_size));
}
return reg;
}
struct qinst *
vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
{
struct qinst *inst = calloc(1, sizeof(*inst));
inst->qpu = v3d_qpu_nop();
inst->qpu.alu.add.op = op;
inst->dst = dst;
inst->src[0] = src0;
inst->src[1] = src1;
inst->uniform = ~0;
return inst;
}
struct qinst *
vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
{
struct qinst *inst = calloc(1, sizeof(*inst));
inst->qpu = v3d_qpu_nop();
inst->qpu.alu.mul.op = op;
inst->dst = dst;
inst->src[0] = src0;
inst->src[1] = src1;
inst->uniform = ~0;
return inst;
}
struct qinst *
vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
{
struct qinst *inst = calloc(1, sizeof(*inst));
inst->qpu = v3d_qpu_nop();
inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
inst->qpu.branch.cond = cond;
inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
inst->qpu.branch.ub = true;
inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
inst->dst = vir_reg(QFILE_NULL, 0);
inst->src[0] = src;
inst->uniform = ~0;
return inst;
}
static void
vir_emit(struct v3d_compile *c, struct qinst *inst)
{
list_addtail(&inst->link, &c->cur_block->instructions);
if (inst->dst.file == QFILE_MAGIC &&
inst->dst.index == V3D_QPU_WADDR_VPM)
c->num_vpm_writes++;
}
/* Updates inst to write to a new temporary, emits it, and notes the def. */
struct qreg
vir_emit_def(struct v3d_compile *c, struct qinst *inst)
{
assert(inst->dst.file == QFILE_NULL);
inst->dst = vir_get_temp(c);
if (inst->dst.file == QFILE_TEMP)
c->defs[inst->dst.index] = inst;
vir_emit(c, inst);
return inst->dst;
}
struct qinst *
vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
{
if (inst->dst.file == QFILE_TEMP)
c->defs[inst->dst.index] = NULL;
vir_emit(c, inst);
return inst;
}
struct qblock *
vir_new_block(struct v3d_compile *c)
{
struct qblock *block = rzalloc(c, struct qblock);
list_inithead(&block->instructions);
block->predecessors = _mesa_set_create(block,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
block->index = c->next_block_index++;
return block;
}
void
vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
{
c->cur_block = block;
list_addtail(&block->link, &c->blocks);
}
struct qblock *
vir_entry_block(struct v3d_compile *c)
{
return list_first_entry(&c->blocks, struct qblock, link);
}
struct qblock *
vir_exit_block(struct v3d_compile *c)
{
return list_last_entry(&c->blocks, struct qblock, link);
}
void
vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
{
_mesa_set_add(successor->predecessors, predecessor);
if (predecessor->successors[0]) {
assert(!predecessor->successors[1]);
predecessor->successors[1] = successor;
} else {
predecessor->successors[0] = successor;
}
}
const struct v3d_compiler *
v3d_compiler_init(const struct v3d_device_info *devinfo)
{
struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
if (!compiler)
return NULL;
compiler->devinfo = devinfo;
if (!vir_init_reg_sets(compiler)) {
ralloc_free(compiler);
return NULL;
}
return compiler;
}
void
v3d_compiler_free(const struct v3d_compiler *compiler)
{
ralloc_free((void *)compiler);
}
static struct v3d_compile *
vir_compile_init(const struct v3d_compiler *compiler,
struct v3d_key *key,
nir_shader *s,
int program_id, int variant_id)
{
struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
c->compiler = compiler;
c->devinfo = compiler->devinfo;
c->key = key;
c->program_id = program_id;
c->variant_id = variant_id;
s = nir_shader_clone(c, s);
c->s = s;
list_inithead(&c->blocks);
vir_set_emit_block(c, vir_new_block(c));
c->output_position_index = -1;
c->output_point_size_index = -1;
c->output_sample_mask_index = -1;
c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
_mesa_key_pointer_equal);
return c;
}
static void
v3d_lower_nir(struct v3d_compile *c)
{
struct nir_lower_tex_options tex_options = {
.lower_rect = false, /* XXX */
.lower_txp = ~0,
/* Apply swizzles to all samplers. */
.swizzle_result = ~0,
};
/* Lower the format swizzle and (for 32-bit returns)
* ARB_texture_swizzle-style swizzle.
*/
for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
for (int j = 0; j < 4; j++)
tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
}
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
}
static void
v3d_lower_nir_late(struct v3d_compile *c)
{
NIR_PASS_V(c->s, v3d_nir_lower_io, c);
NIR_PASS_V(c->s, nir_lower_idiv);
}
static void
v3d_set_prog_data_uniforms(struct v3d_compile *c,
struct v3d_prog_data *prog_data)
{
int count = c->num_uniforms;
struct v3d_uniform_list *ulist = &prog_data->uniforms;
ulist->count = count;
ulist->data = ralloc_array(prog_data, uint32_t, count);
memcpy(ulist->data, c->uniform_data,
count * sizeof(*ulist->data));
ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
memcpy(ulist->contents, c->uniform_contents,
count * sizeof(*ulist->contents));
}
/* Copy the compiler UBO range state to the compiled shader, dropping out
* arrays that were never referenced by an indirect load.
*
* (Note that QIR dead code elimination of an array access still leaves that
* array alive, though)
*/
static void
v3d_set_prog_data_ubo(struct v3d_compile *c,
struct v3d_prog_data *prog_data)
{
if (!c->num_ubo_ranges)
return;
prog_data->num_ubo_ranges = 0;
prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
c->num_ubo_ranges);
for (int i = 0; i < c->num_ubo_ranges; i++) {
if (!c->ubo_range_used[i])
continue;
struct v3d_ubo_range *range = &c->ubo_ranges[i];
prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
prog_data->ubo_size += range->size;
}
if (prog_data->ubo_size) {
if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
vir_get_stage_name(c),
c->program_id, c->variant_id,
prog_data->ubo_size / 4);
}
}
}
static void
v3d_set_prog_data(struct v3d_compile *c,
struct v3d_prog_data *prog_data)
{
v3d_set_prog_data_uniforms(c, prog_data);
v3d_set_prog_data_ubo(c, prog_data);
}
static uint64_t *
v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
{
*final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
uint64_t *qpu_insts = malloc(*final_assembly_size);
if (!qpu_insts)
return NULL;
memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
vir_compile_destroy(c);
return qpu_insts;
}
uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
struct v3d_vs_key *key,
struct v3d_vs_prog_data *prog_data,
nir_shader *s,
int program_id, int variant_id,
uint32_t *final_assembly_size)
{
struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
program_id, variant_id);
c->vs_key = key;
v3d_lower_nir(c);
if (key->clamp_color)
NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
if (key->base.ucp_enables) {
NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
NIR_PASS_V(c->s, nir_lower_io_to_scalar,
nir_var_shader_out);
}
/* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
v3d_lower_nir_late(c);
v3d_optimize_nir(c->s);
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
v3d_nir_to_vir(c);
v3d_set_prog_data(c, &prog_data->base);
prog_data->base.num_inputs = c->num_inputs;
/* The vertex data gets format converted by the VPM so that
* each attribute channel takes up a VPM column. Precompute
* the sizes for the shader record.
*/
for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
prog_data->vattr_sizes[i] = c->vattr_sizes[i];
prog_data->vpm_input_size += c->vattr_sizes[i];
}
/* Input/output segment size are in 8x32-bit multiples. */
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
prog_data->uses_vid = (s->info.system_values_read &
(1ull << SYSTEM_VALUE_VERTEX_ID));
prog_data->uses_iid = (s->info.system_values_read &
(1ull << SYSTEM_VALUE_INSTANCE_ID));
return v3d_return_qpu_insts(c, final_assembly_size);
}
static void
v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
struct v3d_fs_prog_data *prog_data)
{
prog_data->base.num_inputs = c->num_inputs;
memcpy(prog_data->input_slots, c->input_slots,
c->num_inputs * sizeof(*c->input_slots));
for (int i = 0; i < c->num_inputs; i++) {
struct v3d_varying_slot v3d_slot = c->input_slots[i];
uint8_t slot = v3d_slot_get_slot(v3d_slot);
if (slot == VARYING_SLOT_COL0 ||
slot == VARYING_SLOT_COL1 ||
slot == VARYING_SLOT_BFC0 ||
slot == VARYING_SLOT_BFC1) {
BITSET_SET(prog_data->color_inputs, i);
}
if (BITSET_TEST(c->flat_shade_flags, i))
BITSET_SET(prog_data->flat_shade_flags, i);
}
}
uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
struct v3d_fs_key *key,
struct v3d_fs_prog_data *prog_data,
nir_shader *s,
int program_id, int variant_id,
uint32_t *final_assembly_size)
{
struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
program_id, variant_id);
c->fs_key = key;
v3d_lower_nir(c);
if (key->light_twoside)
NIR_PASS_V(c->s, nir_lower_two_sided_color);
if (key->clamp_color)
NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
if (key->alpha_test) {
NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
false);
}
if (key->base.ucp_enables)
NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
/* Note: FS input scalarizing must happen after
* nir_lower_two_sided_color, which only handles a vec4 at a time.
*/
NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
v3d_lower_nir_late(c);
v3d_optimize_nir(c->s);
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
v3d_nir_to_vir(c);
v3d_set_prog_data(c, &prog_data->base);
v3d_set_fs_prog_data_inputs(c, prog_data);
if (c->s->info.outputs_written & (1 << FRAG_RESULT_DEPTH))
prog_data->writes_z = true;
return v3d_return_qpu_insts(c, final_assembly_size);
}
void
vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
{
if (qinst->dst.file == QFILE_TEMP)
c->defs[qinst->dst.index] = NULL;
list_del(&qinst->link);
free(qinst);
}
struct qreg
vir_follow_movs(struct v3d_compile *c, struct qreg reg)
{
/* XXX
int pack = reg.pack;
while (reg.file == QFILE_TEMP &&
c->defs[reg.index] &&
(c->defs[reg.index]->op == QOP_MOV ||
c->defs[reg.index]->op == QOP_FMOV) &&
!c->defs[reg.index]->dst.pack &&
!c->defs[reg.index]->src[0].pack) {
reg = c->defs[reg.index]->src[0];
}
reg.pack = pack;
*/
return reg;
}
void
vir_compile_destroy(struct v3d_compile *c)
{
vir_for_each_block(block, c) {
while (!list_empty(&block->instructions)) {
struct qinst *qinst =
list_first_entry(&block->instructions,
struct qinst, link);
vir_remove_instruction(c, qinst);
}
}
ralloc_free(c);
}
struct qreg
vir_uniform(struct v3d_compile *c,
enum quniform_contents contents,
uint32_t data)
{
for (int i = 0; i < c->num_uniforms; i++) {
if (c->uniform_contents[i] == contents &&
c->uniform_data[i] == data) {
return vir_reg(QFILE_UNIF, i);
}
}
uint32_t uniform = c->num_uniforms++;
if (uniform >= c->uniform_array_size) {
c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
c->uniform_array_size * 2);
c->uniform_data = reralloc(c, c->uniform_data,
uint32_t,
c->uniform_array_size);
c->uniform_contents = reralloc(c, c->uniform_contents,
enum quniform_contents,
c->uniform_array_size);
}
c->uniform_contents[uniform] = contents;
c->uniform_data[uniform] = data;
return vir_reg(QFILE_UNIF, uniform);
}
void
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
{
struct qinst *last_inst = NULL;
if (!list_empty(&c->cur_block->instructions))
last_inst = (struct qinst *)c->cur_block->instructions.prev;
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
last_inst != c->defs[src.index]) {
/* XXX: Make the MOV be the appropriate type */
last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
last_inst = (struct qinst *)c->cur_block->instructions.prev;
}
vir_set_pf(last_inst, pf);
}
#define OPTPASS(func) \
do { \
bool stage_progress = func(c); \
if (stage_progress) { \
progress = true; \
if (print_opt_debug) { \
fprintf(stderr, \
"VIR opt pass %2d: %s progress\n", \
pass, #func); \
} \
/*XXX vir_validate(c);*/ \
} \
} while (0)
void
vir_optimize(struct v3d_compile *c)
{
bool print_opt_debug = false;
int pass = 1;
while (true) {
bool progress = false;
OPTPASS(vir_opt_copy_propagate);
OPTPASS(vir_opt_dead_code);
if (!progress)
break;
pass++;
}
}
const char *
vir_get_stage_name(struct v3d_compile *c)
{
if (c->vs_key && c->vs_key->is_coord)
return "MESA_SHADER_COORD";
else
return gl_shader_stage_name(c->s->stage);
}

View File

@ -0,0 +1,339 @@
/*
* Copyright © 2016-2017 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3d_compiler.h"
static void
vir_print_reg(struct v3d_compile *c, struct qreg reg)
{
static const char *files[] = {
[QFILE_TEMP] = "t",
[QFILE_VARY] = "v",
[QFILE_UNIF] = "u",
[QFILE_TLB] = "tlb",
[QFILE_TLBU] = "tlbu",
};
static const char *quniform_names[] = {
[QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
[QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
[QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
[QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
};
switch (reg.file) {
case QFILE_NULL:
fprintf(stderr, "null");
break;
case QFILE_LOAD_IMM:
fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index));
break;
case QFILE_REG:
fprintf(stderr, "rf%d", reg.index);
break;
case QFILE_MAGIC:
fprintf(stderr, "%s", v3d_qpu_magic_waddr_name(reg.index));
break;
case QFILE_SMALL_IMM:
if ((int)reg.index >= -16 && (int)reg.index <= 15)
fprintf(stderr, "%d", reg.index);
else
fprintf(stderr, "%f", uif(reg.index));
break;
case QFILE_VPM:
fprintf(stderr, "vpm%d.%d",
reg.index / 4, reg.index % 4);
break;
case QFILE_TLB:
fprintf(stderr, "%s", files[reg.file]);
break;
case QFILE_UNIF: {
enum quniform_contents contents = c->uniform_contents[reg.index];
fprintf(stderr, "%s%d", files[reg.file], reg.index);
switch (contents) {
case QUNIFORM_CONSTANT:
fprintf(stderr, " (0x%08x / %f)",
c->uniform_data[reg.index],
uif(c->uniform_data[reg.index]));
break;
case QUNIFORM_UNIFORM:
fprintf(stderr, " (push[%d])",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
fprintf(stderr, " (tex[%d].p1)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_WIDTH:
fprintf(stderr, " (tex[%d].width)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_HEIGHT:
fprintf(stderr, " (tex[%d].height)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_DEPTH:
fprintf(stderr, " (tex[%d].depth)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_ARRAY_SIZE:
fprintf(stderr, " (tex[%d].array_size)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_TEXTURE_LEVELS:
fprintf(stderr, " (tex[%d].levels)",
c->uniform_data[reg.index]);
break;
case QUNIFORM_UBO_ADDR:
fprintf(stderr, " (ubo[%d])",
c->uniform_data[reg.index]);
break;
default:
if (quniform_contents_is_texture_p0(contents)) {
fprintf(stderr, " (tex[%d].p0: 0x%08x)",
contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
c->uniform_data[reg.index]);
} else if (contents < ARRAY_SIZE(quniform_names)) {
fprintf(stderr, " (%s)",
quniform_names[contents]);
} else {
fprintf(stderr, " (%d / 0x%08x)", contents,
c->uniform_data[reg.index]);
}
}
break;
}
default:
fprintf(stderr, "%s%d", files[reg.file], reg.index);
break;
}
}
static void
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
{
struct v3d_qpu_sig *sig = &inst->qpu.sig;
if (sig->thrsw)
fprintf(stderr, "; thrsw");
if (sig->ldvary)
fprintf(stderr, "; ldvary");
if (sig->ldvpm)
fprintf(stderr, "; ldvpm");
if (sig->ldtmu)
fprintf(stderr, "; ldtmu");
if (sig->ldunif)
fprintf(stderr, "; ldunif");
if (sig->wrtmuc)
fprintf(stderr, "; wrtmuc");
}
static void
vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
{
struct v3d_qpu_instr *instr = &inst->qpu;
int nsrc = vir_get_non_sideband_nsrc(inst);
int sideband_nsrc = vir_get_nsrc(inst);
enum v3d_qpu_input_unpack unpack[2];
if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) {
fprintf(stderr, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.ac));
fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.apf));
fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.auf));
fprintf(stderr, " ");
vir_print_reg(c, inst->dst);
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack));
unpack[0] = instr->alu.add.a_unpack;
unpack[1] = instr->alu.add.b_unpack;
} else {
fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc));
fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.mpf));
fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.muf));
fprintf(stderr, " ");
vir_print_reg(c, inst->dst);
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack));
unpack[0] = instr->alu.mul.a_unpack;
unpack[1] = instr->alu.mul.b_unpack;
}
for (int i = 0; i < sideband_nsrc; i++) {
fprintf(stderr, ", ");
vir_print_reg(c, inst->src[i]);
if (i < nsrc)
fprintf(stderr, "%s", v3d_qpu_unpack_name(unpack[i]));
}
vir_dump_sig(c, inst);
}
void
vir_dump_inst(struct v3d_compile *c, struct qinst *inst)
{
struct v3d_qpu_instr *instr = &inst->qpu;
switch (inst->qpu.type) {
case V3D_QPU_INSTR_TYPE_ALU:
vir_dump_alu(c, inst);
break;
case V3D_QPU_INSTR_TYPE_BRANCH:
fprintf(stderr, "b");
if (instr->branch.ub)
fprintf(stderr, "u");
fprintf(stderr, "%s",
v3d_qpu_branch_cond_name(instr->branch.cond));
fprintf(stderr, "%s", v3d_qpu_msfign_name(instr->branch.msfign));
switch (instr->branch.bdi) {
case V3D_QPU_BRANCH_DEST_ABS:
fprintf(stderr, " zero_addr+0x%08x", instr->branch.offset);
break;
case V3D_QPU_BRANCH_DEST_REL:
fprintf(stderr, " %d", instr->branch.offset);
break;
case V3D_QPU_BRANCH_DEST_LINK_REG:
fprintf(stderr, " lri");
break;
case V3D_QPU_BRANCH_DEST_REGFILE:
fprintf(stderr, " rf%d", instr->branch.raddr_a);
break;
}
if (instr->branch.ub) {
switch (instr->branch.bdu) {
case V3D_QPU_BRANCH_DEST_ABS:
fprintf(stderr, ", a:unif");
break;
case V3D_QPU_BRANCH_DEST_REL:
fprintf(stderr, ", r:unif");
break;
case V3D_QPU_BRANCH_DEST_LINK_REG:
fprintf(stderr, ", lri");
break;
case V3D_QPU_BRANCH_DEST_REGFILE:
fprintf(stderr, ", rf%d", instr->branch.raddr_a);
break;
}
}
if (vir_has_implicit_uniform(inst)) {
fprintf(stderr, " ");
vir_print_reg(c, inst->src[vir_get_implicit_uniform_src(inst)]);
}
break;
}
}
void
vir_dump(struct v3d_compile *c)
{
int ip = 0;
vir_for_each_block(block, c) {
fprintf(stderr, "BLOCK %d:\n", block->index);
vir_for_each_inst(inst, block) {
if (c->temp_start) {
bool first = true;
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] != ip)
continue;
if (first) {
first = false;
} else {
fprintf(stderr, ", ");
}
fprintf(stderr, "S%4d", i);
}
if (first)
fprintf(stderr, " ");
else
fprintf(stderr, " ");
}
if (c->temp_end) {
bool first = true;
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_end[i] != ip)
continue;
if (first) {
first = false;
} else {
fprintf(stderr, ", ");
}
fprintf(stderr, "E%4d", i);
}
if (first)
fprintf(stderr, " ");
else
fprintf(stderr, " ");
}
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
ip++;
}
if (block->successors[1]) {
fprintf(stderr, "-> BLOCK %d, %d\n",
block->successors[0]->index,
block->successors[1]->index);
} else if (block->successors[0]) {
fprintf(stderr, "-> BLOCK %d\n",
block->successors[0]->index);
}
}
}

View File

@ -0,0 +1,340 @@
/*
* Copyright © 2012 Intel Corporation
* Copyright © 2016 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define MAX_INSTRUCTION (1 << 30)
#include "util/ralloc.h"
#include "util/register_allocate.h"
#include "v3d_compiler.h"
struct partial_update_state {
struct qinst *insts[4];
uint8_t channels;
};
static uint32_t
int_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(int));
}
static bool
int_compare(const void *key1, const void *key2)
{
return *(const int *)key1 == *(const int *)key2;
}
static int
vir_reg_to_var(struct qreg reg)
{
if (reg.file == QFILE_TEMP)
return reg.index;
return -1;
}
static void
vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
struct qreg src)
{
int var = vir_reg_to_var(src);
if (var == -1)
return;
c->temp_start[var] = MIN2(c->temp_start[var], ip);
c->temp_end[var] = MAX2(c->temp_end[var], ip);
/* The use[] bitset marks when the block makes
* use of a variable without having completely
* defined that variable within the block.
*/
if (!BITSET_TEST(block->def, var))
BITSET_SET(block->use, var);
}
static struct partial_update_state *
get_partial_update_state(struct hash_table *partial_update_ht,
struct qinst *inst)
{
struct hash_entry *entry =
_mesa_hash_table_search(partial_update_ht,
&inst->dst.index);
if (entry)
return entry->data;
struct partial_update_state *state =
rzalloc(partial_update_ht, struct partial_update_state);
_mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
return state;
}
static void
vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
struct hash_table *partial_update_ht, struct qinst *inst)
{
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
return;
/* The def[] bitset marks when an initialization in a
* block completely screens off previous updates of
* that variable.
*/
int var = vir_reg_to_var(inst->dst);
if (var == -1)
return;
c->temp_start[var] = MIN2(c->temp_start[var], ip);
c->temp_end[var] = MAX2(c->temp_end[var], ip);
/* If we've already tracked this as a def, or already used it within
* the block, there's nothing to do.
*/
if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
return;
/* Easy, common case: unconditional full register update.
*
* We treat conditioning on the exec mask as the same as not being
* conditional. This makes sure that if the register gets set on
* either side of an if, it is treated as being screened off before
* the if. Otherwise, if there was no intervening def, its live
* interval doesn't extend back to the start of he program, and if too
* many registers did that we'd fail to register allocate.
*/
if (((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
inst->qpu.flags.mc == V3D_QPU_COND_NONE) ||
inst->cond_is_exec_mask) &&
inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
BITSET_SET(block->def, var);
return;
}
/* Finally, look at the condition code and packing and mark it as a
* def. We need to make sure that we understand sequences
* instructions like:
*
* mov.zs t0, t1
* mov.zc t0, t2
*
* or:
*
* mmov t0.8a, t1
* mmov t0.8b, t2
* mmov t0.8c, t3
* mmov t0.8d, t4
*
* as defining the temp within the block, because otherwise dst's live
* range will get extended up the control flow to the top of the
* program.
*/
struct partial_update_state *state =
get_partial_update_state(partial_update_ht, inst);
uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
state->channels |= mask;
} else {
for (int i = 0; i < 4; i++) {
if (!(mask & (1 << i)))
continue;
/* XXXif (state->insts[i] &&
state->insts[i]->cond ==
qpu_cond_complement(inst->cond))
state->channels |= 1 << i;
else
*/
state->insts[i] = inst;
}
}
if (state->channels == 0xf)
BITSET_SET(block->def, var);
}
static void
sf_state_clear(struct hash_table *partial_update_ht)
{
struct hash_entry *entry;
hash_table_foreach(partial_update_ht, entry) {
struct partial_update_state *state = entry->data;
for (int i = 0; i < 4; i++) {
if (state->insts[i] &&
(state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
state->insts[i] = NULL;
}
}
}
/* Sets up the def/use arrays for when variables are used-before-defined or
* defined-before-used in the block.
*
* Also initializes the temp_start/temp_end to cover just the instruction IPs
* where the variable is used, which will be extended later in
* vir_compute_start_end().
*/
static void
vir_setup_def_use(struct v3d_compile *c)
{
struct hash_table *partial_update_ht =
_mesa_hash_table_create(c, int_hash, int_compare);
int ip = 0;
vir_for_each_block(block, c) {
block->start_ip = ip;
_mesa_hash_table_clear(partial_update_ht, NULL);
vir_for_each_inst(inst, block) {
for (int i = 0; i < vir_get_nsrc(inst); i++)
vir_setup_use(c, block, ip, inst->src[i]);
vir_setup_def(c, block, ip, partial_update_ht, inst);
if (false /* XXX inst->uf */)
sf_state_clear(partial_update_ht);
/* Payload registers: r0/1/2 contain W, centroid W,
* and Z at program start. Register allocation will
* force their nodes to R0/1/2.
*/
if (inst->src[0].file == QFILE_REG) {
switch (inst->src[0].index) {
case 0:
case 1:
case 2:
c->temp_start[inst->dst.index] = 0;
break;
}
}
ip++;
}
block->end_ip = ip;
}
_mesa_hash_table_destroy(partial_update_ht, NULL);
}
static bool
vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
{
bool cont = false;
vir_for_each_block_rev(block, c) {
/* Update live_out: Any successor using the variable
* on entrance needs us to have the variable live on
* exit.
*/
vir_for_each_successor(succ, block) {
for (int i = 0; i < bitset_words; i++) {
BITSET_WORD new_live_out = (succ->live_in[i] &
~block->live_out[i]);
if (new_live_out) {
block->live_out[i] |= new_live_out;
cont = true;
}
}
}
/* Update live_in */
for (int i = 0; i < bitset_words; i++) {
BITSET_WORD new_live_in = (block->use[i] |
(block->live_out[i] &
~block->def[i]));
if (new_live_in & ~block->live_in[i]) {
block->live_in[i] |= new_live_in;
cont = true;
}
}
}
return cont;
}
/**
* Extend the start/end ranges for each variable to account for the
* new information calculated from control flow.
*/
static void
vir_compute_start_end(struct v3d_compile *c, int num_vars)
{
vir_for_each_block(block, c) {
for (int i = 0; i < num_vars; i++) {
if (BITSET_TEST(block->live_in, i)) {
c->temp_start[i] = MIN2(c->temp_start[i],
block->start_ip);
c->temp_end[i] = MAX2(c->temp_end[i],
block->start_ip);
}
if (BITSET_TEST(block->live_out, i)) {
c->temp_start[i] = MIN2(c->temp_start[i],
block->end_ip);
c->temp_end[i] = MAX2(c->temp_end[i],
block->end_ip);
}
}
}
}
void
vir_calculate_live_intervals(struct v3d_compile *c)
{
int bitset_words = BITSET_WORDS(c->num_temps);
/* If we called this function more than once, then we should be
* freeing the previous arrays.
*/
assert(!c->temp_start);
c->temp_start = rzalloc_array(c, int, c->num_temps);
c->temp_end = rzalloc_array(c, int, c->num_temps);
for (int i = 0; i < c->num_temps; i++) {
c->temp_start[i] = MAX_INSTRUCTION;
c->temp_end[i] = -1;
}
vir_for_each_block(block, c) {
block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
}
vir_setup_def_use(c);
while (vir_live_variables_dataflow(c, bitset_words))
;
vir_compute_start_end(c, c->num_temps);
}

View File

@ -0,0 +1,209 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file v3d_vir_lower_uniforms.c
*
* This is the pre-code-generation pass for fixing up instructions that try to
* read from multiple uniform values.
*/
#include "v3d_compiler.h"
#include "util/hash_table.h"
#include "util/u_math.h"
static inline uint32_t
index_hash(const void *key)
{
return (uintptr_t)key;
}
static inline bool
index_compare(const void *a, const void *b)
{
return a == b;
}
static void
add_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
if (entry) {
entry->data++;
} else {
_mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1);
}
}
static void
remove_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
assert(entry);
entry->data--;
if (entry->data == NULL)
_mesa_hash_table_remove(ht, entry);
}
static bool
is_lowerable_uniform(struct qinst *inst, int i)
{
if (inst->src[i].file != QFILE_UNIF)
return false;
if (vir_has_implicit_uniform(inst))
return i != vir_get_implicit_uniform_src(inst);
return true;
}
/* Returns the number of different uniform values referenced by the
* instruction.
*/
static uint32_t
vir_get_instruction_uniform_count(struct qinst *inst)
{
uint32_t count = 0;
for (int i = 0; i < vir_get_nsrc(inst); i++) {
if (inst->src[i].file != QFILE_UNIF)
continue;
bool is_duplicate = false;
for (int j = 0; j < i; j++) {
if (inst->src[j].file == QFILE_UNIF &&
inst->src[j].index == inst->src[i].index) {
is_duplicate = true;
break;
}
}
if (!is_duplicate)
count++;
}
return count;
}
void
vir_lower_uniforms(struct v3d_compile *c)
{
struct hash_table *ht =
_mesa_hash_table_create(c, index_hash, index_compare);
/* Walk the instruction list, finding which instructions have more
* than one uniform referenced, and add those uniform values to the
* ht.
*/
vir_for_each_inst_inorder(inst, c) {
uint32_t nsrc = vir_get_nsrc(inst);
if (vir_get_instruction_uniform_count(inst) <= 1)
continue;
for (int i = 0; i < nsrc; i++) {
if (is_lowerable_uniform(inst, i))
add_uniform(ht, inst->src[i]);
}
}
while (ht->entries) {
/* Find the most commonly used uniform in instructions that
* need a uniform lowered.
*/
uint32_t max_count = 0;
uint32_t max_index = 0;
struct hash_entry *entry;
hash_table_foreach(ht, entry) {
uint32_t count = (uintptr_t)entry->data;
uint32_t index = (uintptr_t)entry->key - 1;
if (count > max_count) {
max_count = count;
max_index = index;
}
}
struct qreg unif = vir_reg(QFILE_UNIF, max_index);
/* Now, find the instructions using this uniform and make them
* reference a temp instead.
*/
vir_for_each_block(block, c) {
struct qinst *mov = NULL;
vir_for_each_inst(inst, block) {
uint32_t nsrc = vir_get_nsrc(inst);
uint32_t count = vir_get_instruction_uniform_count(inst);
if (count <= 1)
continue;
/* If the block doesn't have a load of the
* uniform yet, add it. We could potentially
* do better and CSE MOVs from multiple blocks
* into dominating blocks, except that may
* cause troubles for register allocation.
*/
if (!mov) {
mov = vir_mul_inst(V3D_QPU_M_MOV,
vir_get_temp(c),
unif, c->undef);
list_add(&mov->link,
&block->instructions);
c->defs[mov->dst.index] = mov;
}
bool removed = false;
for (int i = 0; i < nsrc; i++) {
if (is_lowerable_uniform(inst, i) &&
inst->src[i].index == max_index) {
inst->src[i].file =
mov->dst.file;
inst->src[i].index =
mov->dst.index;
remove_uniform(ht, unif);
removed = true;
}
}
if (removed)
count--;
/* If the instruction doesn't need lowering any more,
* then drop it from the list.
*/
if (count <= 1) {
for (int i = 0; i < nsrc; i++) {
if (is_lowerable_uniform(inst, i))
remove_uniform(ht, inst->src[i]);
}
}
}
}
}
_mesa_hash_table_destroy(ht, NULL);
}

View File

@ -0,0 +1,233 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file v3d_opt_copy_propagation.c
*
* This implements simple copy propagation for VIR without control flow.
*
* For each temp, it keeps a qreg of which source it was MOVed from, if it
* was. If we see that used later, we can just reuse the source value, since
* we know we don't have control flow, and we have SSA for our values so
* there's no killing to worry about.
*/
#include "v3d_compiler.h"
static bool
is_copy_mov(struct qinst *inst)
{
if (!inst)
return false;
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
(inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
return false;
}
if (inst->dst.file != QFILE_TEMP)
return false;
if (inst->src[0].file != QFILE_TEMP &&
inst->src[0].file != QFILE_UNIF) {
return false;
}
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
return false;
}
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
return false;
}
switch (inst->src[0].file) {
case QFILE_MAGIC:
/* No copy propagating from R3/R4/R5 -- the MOVs from those
* are there to register allocate values produced into R3/4/5
* to other regs (though hopefully r3/4/5).
*/
switch (inst->src[0].index) {
case V3D_QPU_WADDR_R3:
case V3D_QPU_WADDR_R4:
case V3D_QPU_WADDR_R5:
return false;
default:
break;
}
break;
case QFILE_REG:
switch (inst->src[0].index) {
case 0:
case 1:
case 2:
/* MOVs from rf0/1/2 are only to track the live
* intervals for W/centroid W/Z.
*/
return false;
}
break;
default:
break;
}
return true;
}
static bool
vir_has_unpack(struct qinst *inst, int chan)
{
assert(chan == 0 || chan == 1);
if (vir_is_add(inst)) {
if (chan == 0)
return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE;
else
return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE;
} else {
if (chan == 0)
return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE;
else
return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE;
}
}
static bool
try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
{
bool debug = false;
bool progress = false;
for (int i = 0; i < vir_get_nsrc(inst); i++) {
if (inst->src[i].file != QFILE_TEMP)
continue;
/* We have two ways of finding MOVs we can copy propagate
* from. One is if it's an SSA def: then we can reuse it from
* any block in the program, as long as its source is also an
* SSA def. Alternatively, if it's in the "movs" array
* tracked within the block, then we know the sources for it
* haven't been changed since we saw the instruction within
* our block.
*/
struct qinst *mov = movs[inst->src[i].index];
if (!mov) {
if (!is_copy_mov(c->defs[inst->src[i].index]))
continue;
mov = c->defs[inst->src[i].index];
if (mov->src[0].file == QFILE_TEMP &&
!c->defs[mov->src[0].index])
continue;
}
if (vir_has_unpack(mov, 0)) {
/* Make sure that the meaning of the unpack
* would be the same between the two
* instructions.
*/
if (vir_is_float_input(inst) !=
vir_is_float_input(mov)) {
continue;
}
/* No composing the unpacks. */
if (vir_has_unpack(inst, i))
continue;
}
if (debug) {
fprintf(stderr, "Copy propagate: ");
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
inst->src[i] = mov->src[0];
if (vir_has_unpack(mov, 0)) {
enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack;
vir_set_unpack(inst, i, unpack);
}
if (debug) {
fprintf(stderr, "to: ");
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
progress = true;
}
return progress;
}
static void
apply_kills(struct v3d_compile *c, struct qinst **movs, struct qinst *inst)
{
if (inst->dst.file != QFILE_TEMP)
return;
for (int i = 0; i < c->num_temps; i++) {
if (movs[i] &&
(movs[i]->dst.index == inst->dst.index ||
(movs[i]->src[0].file == QFILE_TEMP &&
movs[i]->src[0].index == inst->dst.index))) {
movs[i] = NULL;
}
}
}
bool
vir_opt_copy_propagate(struct v3d_compile *c)
{
bool progress = false;
struct qinst **movs;
movs = ralloc_array(c, struct qinst *, c->num_temps);
if (!movs)
return false;
vir_for_each_block(block, c) {
/* The MOVs array tracks only available movs within the
* block.
*/
memset(movs, 0, sizeof(struct qinst *) * c->num_temps);
vir_for_each_inst(inst, block) {
progress = try_copy_prop(c, inst, movs) || progress;
apply_kills(c, movs, inst);
if (is_copy_mov(inst))
movs[inst->dst.index] = inst;
}
}
ralloc_free(movs);
return progress;
}

View File

@ -0,0 +1,162 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file v3d_opt_dead_code.c
*
* This is a simple dead code eliminator for SSA values in VIR.
*
* It walks all the instructions finding what temps are used, then walks again
* to remove instructions writing unused temps.
*
* This is an inefficient implementation if you have long chains of
* instructions where the entire chain is dead, but we expect those to have
* been eliminated at the NIR level, and here we're just cleaning up small
* problems produced by NIR->VIR.
*/
#include "v3d_compiler.h"
static bool debug;
static void
dce(struct v3d_compile *c, struct qinst *inst)
{
if (debug) {
fprintf(stderr, "Removing: ");
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
assert(inst->qpu.flags.apf == V3D_QPU_PF_NONE);
assert(inst->qpu.flags.mpf == V3D_QPU_PF_NONE);
vir_remove_instruction(c, inst);
}
static bool
has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
{
for (int i = 0; i < vir_get_nsrc(inst); i++) {
if (inst->src[i].file == QFILE_VPM) {
/* Instance ID, Vertex ID: Should have been removed at
* the NIR level
*/
if (inst->src[i].index == ~0)
return true;
uint32_t attr = inst->src[i].index / 4;
uint32_t offset = inst->src[i].index % 4;
if (c->vattr_sizes[attr] != offset)
return true;
/* Can't get rid of the last VPM read, or the
* simulator (at least) throws an error.
*/
uint32_t total_size = 0;
for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
total_size += c->vattr_sizes[i];
if (total_size == 1)
return true;
}
/* Dead code removal of varyings is tricky, so just assert
* that it all happened at the NIR level.
*/
if (inst->src[i].file == QFILE_VARY)
return true;
}
return false;
}
bool
vir_opt_dead_code(struct v3d_compile *c)
{
bool progress = false;
bool *used = calloc(c->num_temps, sizeof(bool));
vir_for_each_inst_inorder(inst, c) {
for (int i = 0; i < vir_get_nsrc(inst); i++) {
if (inst->src[i].file == QFILE_TEMP)
used[inst->src[i].index] = true;
}
}
vir_for_each_block(block, c) {
vir_for_each_inst_safe(inst, block) {
if (inst->dst.file != QFILE_NULL &&
!(inst->dst.file == QFILE_TEMP &&
!used[inst->dst.index])) {
continue;
}
if (vir_has_side_effects(c, inst))
continue;
if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
inst->qpu.flags.mpf != V3D_QPU_PF_NONE||
has_nonremovable_reads(c, inst)) {
/* If we can't remove the instruction, but we
* don't need its destination value, just
* remove the destination. The register
* allocator would trivially color it and it
* wouldn't cause any register pressure, but
* it's nicer to read the VIR code without
* unused destination regs.
*/
if (inst->dst.file == QFILE_TEMP) {
if (debug) {
fprintf(stderr,
"Removing dst from: ");
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
c->defs[inst->dst.index] = NULL;
inst->dst.file = QFILE_NULL;
progress = true;
}
continue;
}
for (int i = 0; i < vir_get_nsrc(inst); i++) {
if (inst->src[i].file != QFILE_VPM)
continue;
uint32_t attr = inst->src[i].index / 4;
uint32_t offset = (inst->src[i].index % 4);
if (c->vattr_sizes[attr] == offset) {
c->num_inputs--;
c->vattr_sizes[attr]--;
}
}
dce(c, inst);
progress = true;
continue;
}
}
free(used);
return progress;
}

View File

@ -0,0 +1,254 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "util/ralloc.h"
#include "util/register_allocate.h"
#include "v3d_compiler.h"
#define QPU_R(i) { .magic = false, .index = i }
#define ACC_INDEX 0
#define ACC_COUNT 5
#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
#define PHYS_COUNT 64
bool
vir_init_reg_sets(struct v3d_compiler *compiler)
{
compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
true);
if (!compiler->regs)
return false;
/* Allocate 3 regfile classes, for the ways the physical register file
* can be divided up for fragment shader threading.
*/
for (int threads = 0; threads < 3; threads++) {
compiler->reg_class[threads] =
ra_alloc_reg_class(compiler->regs);
for (int i = PHYS_INDEX;
i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
ra_class_add_reg(compiler->regs,
compiler->reg_class[threads], i);
}
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT; i++) {
ra_class_add_reg(compiler->regs,
compiler->reg_class[threads], i);
}
}
ra_set_finalize(compiler->regs, NULL);
return true;
}
struct node_to_temp_map {
uint32_t temp;
uint32_t priority;
};
static int
node_to_temp_priority(const void *in_a, const void *in_b)
{
const struct node_to_temp_map *a = in_a;
const struct node_to_temp_map *b = in_b;
return a->priority - b->priority;
}
#define CLASS_BIT_PHYS (1 << 0)
#define CLASS_BIT_R0_R2 (1 << 1)
#define CLASS_BIT_R3 (1 << 2)
#define CLASS_BIT_R4 (1 << 3)
/**
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
*
* The return value should be freed by the caller.
*/
struct qpu_reg *
v3d_register_allocate(struct v3d_compile *c)
{
struct node_to_temp_map map[c->num_temps];
uint32_t temp_to_node[c->num_temps];
uint8_t class_bits[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
int acc_nodes[ACC_COUNT];
struct ra_graph *g = ra_alloc_interference_graph(c->compiler->regs,
c->num_temps +
ARRAY_SIZE(acc_nodes));
/* Make some fixed nodes for the accumulators, which we will need to
* interfere with when ops have implied r3/r4 writes or for the thread
* switches. We could represent these as classes for the nodes to
* live in, but the classes take up a lot of memory to set up, so we
* don't want to make too many.
*/
for (int i = 0; i < ARRAY_SIZE(acc_nodes); i++) {
acc_nodes[i] = c->num_temps + i;
ra_set_node_reg(g, acc_nodes[i], ACC_INDEX + i);
}
/* Compute the live ranges so we can figure out interference. */
vir_calculate_live_intervals(c);
for (uint32_t i = 0; i < c->num_temps; i++) {
map[i].temp = i;
map[i].priority = c->temp_end[i] - c->temp_start[i];
}
qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
for (uint32_t i = 0; i < c->num_temps; i++) {
temp_to_node[map[i].temp] = i;
}
/* Figure out our register classes and preallocated registers. We
* start with any temp being able to be in any file, then instructions
* incrementally remove bits that the temp definitely can't be in.
*/
memset(class_bits,
CLASS_BIT_PHYS | CLASS_BIT_R0_R2 | CLASS_BIT_R3 | CLASS_BIT_R4,
sizeof(class_bits));
int ip = 0;
vir_for_each_inst_inorder(inst, c) {
/* If the instruction writes r3/r4 (and optionally moves its
* result to a temp), nothing else can be stored in r3/r4 across
* it.
*/
if (vir_writes_r3(inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
ra_add_node_interference(g,
temp_to_node[i],
acc_nodes[3]);
}
}
}
if (vir_writes_r4(inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
ra_add_node_interference(g,
temp_to_node[i],
acc_nodes[4]);
}
}
}
if (inst->src[0].file == QFILE_REG) {
switch (inst->src[0].index) {
case 0:
case 1:
case 2:
/* Payload setup instructions: Force allocate
* the dst to the given register (so the MOV
* will disappear).
*/
assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
assert(inst->dst.file == QFILE_TEMP);
ra_set_node_reg(g,
temp_to_node[inst->dst.index],
PHYS_INDEX +
inst->src[0].index);
break;
}
}
#if 0
switch (inst->op) {
case QOP_THRSW:
/* All accumulators are invalidated across a thread
* switch.
*/
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip && c->temp_end[i] > ip)
class_bits[i] &= ~(CLASS_BIT_R0_R3 |
CLASS_BIT_R4);
}
break;
default:
break;
}
#endif
ip++;
}
for (uint32_t i = 0; i < c->num_temps; i++) {
ra_set_node_class(g, temp_to_node[i],
c->compiler->reg_class[c->fs_threaded]);
}
for (uint32_t i = 0; i < c->num_temps; i++) {
for (uint32_t j = i + 1; j < c->num_temps; j++) {
if (!(c->temp_start[i] >= c->temp_end[j] ||
c->temp_start[j] >= c->temp_end[i])) {
ra_add_node_interference(g,
temp_to_node[i],
temp_to_node[j]);
}
}
}
bool ok = ra_allocate(g);
if (!ok) {
if (!c->fs_threaded) {
fprintf(stderr, "Failed to register allocate:\n");
vir_dump(c);
}
c->failed = true;
free(temp_registers);
return NULL;
}
for (uint32_t i = 0; i < c->num_temps; i++) {
int ra_reg = ra_get_node_reg(g, temp_to_node[i]);
if (ra_reg < PHYS_INDEX) {
temp_registers[i].magic = true;
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
ra_reg - ACC_INDEX);
} else {
temp_registers[i].magic = false;
temp_registers[i].index = ra_reg - PHYS_INDEX;
}
/* If the value's never used, just write to the NOP register
* for clarity in debug output.
*/
if (c->temp_start[i] == c->temp_end[i]) {
temp_registers[i].magic = true;
temp_registers[i].index = V3D_QPU_WADDR_NOP;
}
}
ralloc_free(g);
return temp_registers;
}

View File

@ -0,0 +1,359 @@
/*
* Copyright © 2016 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/v3d_compiler.h"
#include "qpu/qpu_instr.h"
#include "qpu/qpu_disasm.h"
static inline struct qpu_reg
qpu_reg(int index)
{
struct qpu_reg reg = {
.magic = false,
.index = index,
};
return reg;
}
static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)
{
struct qpu_reg reg = {
.magic = true,
.index = waddr,
};
return reg;
}
static inline struct qpu_reg
qpu_acc(int acc)
{
return qpu_magic(V3D_QPU_WADDR_R0 + acc);
}
struct v3d_qpu_instr
v3d_qpu_nop(void)
{
struct v3d_qpu_instr instr = {
.type = V3D_QPU_INSTR_TYPE_ALU,
.alu = {
.add = {
.op = V3D_QPU_A_NOP,
.waddr = V3D_QPU_WADDR_NOP,
.magic_write = true,
},
.mul = {
.op = V3D_QPU_M_NOP,
.waddr = V3D_QPU_WADDR_NOP,
.magic_write = true,
},
}
};
return instr;
}
static struct qinst *
vir_nop(void)
{
struct qreg undef = { QFILE_NULL, 0 };
struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
return qinst;
}
static struct qinst *
new_qpu_nop_before(struct qinst *inst)
{
struct qinst *q = vir_nop();
list_addtail(&q->link, &inst->link);
return q;
}
static void
new_ldunif_instr(struct qinst *inst, int i)
{
struct qinst *ldunif = new_qpu_nop_before(inst);
ldunif->qpu.sig.ldunif = true;
assert(inst->src[i].file == QFILE_UNIF);
ldunif->uniform = inst->src[i].index;
}
/**
* Allocates the src register (accumulator or register file) into the RADDR
* fields of the instruction.
*/
static void
set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
{
if (src.magic) {
assert(src.index >= V3D_QPU_WADDR_R0 &&
src.index <= V3D_QPU_WADDR_R5);
*mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
return;
}
if (instr->alu.add.a != V3D_QPU_MUX_A &&
instr->alu.add.b != V3D_QPU_MUX_A &&
instr->alu.mul.a != V3D_QPU_MUX_A &&
instr->alu.mul.b != V3D_QPU_MUX_A) {
instr->raddr_a = src.index;
*mux = V3D_QPU_MUX_A;
} else {
if (instr->raddr_a == src.index) {
*mux = V3D_QPU_MUX_A;
} else {
assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
instr->alu.add.b == V3D_QPU_MUX_B &&
instr->alu.mul.a == V3D_QPU_MUX_B &&
instr->alu.mul.b == V3D_QPU_MUX_B) ||
src.index == instr->raddr_b);
instr->raddr_b = src.index;
*mux = V3D_QPU_MUX_B;
}
}
}
static void
v3d_generate_code_block(struct v3d_compile *c,
struct qblock *block,
struct qpu_reg *temp_registers)
{
int last_vpm_read_index = -1;
vir_for_each_inst(qinst, block) {
#if 0
fprintf(stderr, "translating qinst to qpu: ");
vir_dump_inst(c, qinst);
fprintf(stderr, "\n");
#endif
struct qinst *temp;
if (vir_has_implicit_uniform(qinst)) {
int src = vir_get_implicit_uniform_src(qinst);
assert(qinst->src[src].file == QFILE_UNIF);
qinst->uniform = qinst->src[src].index;
c->num_uniforms++;
}
int nsrc = vir_get_non_sideband_nsrc(qinst);
struct qpu_reg src[ARRAY_SIZE(qinst->src)];
bool emitted_ldunif = false;
for (int i = 0; i < nsrc; i++) {
int index = qinst->src[i].index;
switch (qinst->src[i].file) {
case QFILE_REG:
src[i] = qpu_reg(qinst->src[i].index);
break;
case QFILE_MAGIC:
src[i] = qpu_magic(qinst->src[i].index);
break;
case QFILE_NULL:
case QFILE_LOAD_IMM:
src[i] = qpu_acc(0);
break;
case QFILE_TEMP:
src[i] = temp_registers[index];
break;
case QFILE_UNIF:
if (!emitted_ldunif) {
new_ldunif_instr(qinst, i);
c->num_uniforms++;
emitted_ldunif = true;
}
src[i] = qpu_acc(5);
break;
case QFILE_VARY:
temp = new_qpu_nop_before(qinst);
temp->qpu.sig.ldvary = true;
src[i] = qpu_acc(3);
break;
case QFILE_SMALL_IMM:
abort(); /* XXX */
#if 0
src[i].mux = QPU_MUX_SMALL_IMM;
src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
/* This should only have returned a valid
* small immediate field, not ~0 for failure.
*/
assert(src[i].addr <= 47);
#endif
break;
case QFILE_VPM:
assert((int)qinst->src[i].index >=
last_vpm_read_index);
(void)last_vpm_read_index;
last_vpm_read_index = qinst->src[i].index;
temp = new_qpu_nop_before(qinst);
temp->qpu.sig.ldvpm = true;
src[i] = qpu_acc(3);
break;
case QFILE_TLB:
case QFILE_TLBU:
unreachable("bad vir src file");
}
}
struct qpu_reg dst;
switch (qinst->dst.file) {
case QFILE_NULL:
dst = qpu_magic(V3D_QPU_WADDR_NOP);
break;
case QFILE_REG:
dst = qpu_reg(qinst->dst.index);
break;
case QFILE_MAGIC:
dst = qpu_magic(qinst->dst.index);
break;
case QFILE_TEMP:
dst = temp_registers[qinst->dst.index];
break;
case QFILE_VPM:
dst = qpu_magic(V3D_QPU_WADDR_VPM);
break;
case QFILE_TLB:
dst = qpu_magic(V3D_QPU_WADDR_TLB);
break;
case QFILE_TLBU:
dst = qpu_magic(V3D_QPU_WADDR_TLBU);
break;
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
case QFILE_LOAD_IMM:
assert(!"not reached");
break;
}
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
if (nsrc >= 1) {
set_src(&qinst->qpu,
&qinst->qpu.alu.add.a, src[0]);
}
if (nsrc >= 2) {
set_src(&qinst->qpu,
&qinst->qpu.alu.add.b, src[1]);
}
qinst->qpu.alu.add.waddr = dst.index;
qinst->qpu.alu.add.magic_write = dst.magic;
} else {
if (nsrc >= 1) {
set_src(&qinst->qpu,
&qinst->qpu.alu.mul.a, src[0]);
}
if (nsrc >= 2) {
set_src(&qinst->qpu,
&qinst->qpu.alu.mul.b, src[1]);
}
qinst->qpu.alu.mul.waddr = dst.index;
qinst->qpu.alu.mul.magic_write = dst.magic;
}
} else {
assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
}
}
}
static void
v3d_dump_qpu(struct v3d_compile *c)
{
fprintf(stderr, "%s prog %d/%d QPU:\n",
vir_get_stage_name(c),
c->program_id, c->variant_id);
for (int i = 0; i < c->qpu_inst_count; i++) {
const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
}
fprintf(stderr, "\n");
}
void
v3d_vir_to_qpu(struct v3d_compile *c)
{
struct qpu_reg *temp_registers = v3d_register_allocate(c);
struct qblock *end_block = list_last_entry(&c->blocks,
struct qblock, link);
/* Reset the uniform count to how many will be actually loaded by the
* generated QPU code.
*/
c->num_uniforms = 0;
vir_for_each_block(block, c)
v3d_generate_code_block(c, block, temp_registers);
struct qinst *thrsw = vir_nop();
list_addtail(&thrsw->link, &end_block->instructions);
thrsw->qpu.sig.thrsw = true;
uint32_t cycles = v3d_qpu_schedule_instructions(c);
c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
int i = 0;
vir_for_each_inst_inorder(inst, c) {
bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
&c->qpu_insts[i++]);
assert(ok); (void) ok;
}
assert(i == c->qpu_inst_count);
if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
vir_get_stage_name(c),
c->program_id, c->variant_id,
cycles);
}
if (V3D_DEBUG & (V3D_DEBUG_QPU |
v3d_debug_flag_for_shader_stage(c->s->stage))) {
v3d_dump_qpu(c);
}
qpu_validate(c);
free(temp_registers);
}