broadcom: Add VC5 NIR compiler.
This is a pretty straightforward fork of VC4's NIR compiler to VC5. The condition codes, registers, and I/O have all changed, making the backend hard to share, though their heritage is still recognizable. v2: Move to src/broadcom/compiler to match intel's layout, rename more "vc5" to "v3d", rename QIR to VIR ("V3D IR") to avoid symbol conflicts with vc4, use new v3d_debug header, add compiler init/free functions, do texture swizzling in NIR to allow optimization.
This commit is contained in:
parent
f71364f297
commit
ade416d023
|
@ -26,6 +26,8 @@ AM_CPPFLAGS = \
|
|||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/broadcom/ \
|
||||
-I$(top_srcdir)/src/broadcom/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES)
|
||||
|
||||
|
|
|
@ -16,6 +16,19 @@ BROADCOM_FILES = \
|
|||
clif/clif_dump.c \
|
||||
clif/clif_dump.h \
|
||||
common/v3d_device_info.h \
|
||||
compiler/nir_to_vir.c \
|
||||
compiler/vir.c \
|
||||
compiler/vir_dump.c \
|
||||
compiler/vir_live_variables.c \
|
||||
compiler/vir_lower_uniforms.c \
|
||||
compiler/vir_opt_copy_propagate.c \
|
||||
compiler/vir_opt_dead_code.c \
|
||||
compiler/vir_register_allocate.c \
|
||||
compiler/vir_to_qpu.c \
|
||||
compiler/qpu_schedule.c \
|
||||
compiler/qpu_validate.c \
|
||||
compiler/v3d_compiler.h \
|
||||
compiler/v3d_nir_lower_io.c \
|
||||
qpu/qpu_disasm.c \
|
||||
qpu/qpu_disasm.h \
|
||||
qpu/qpu_instr.c \
|
||||
|
|
|
@ -13,6 +13,7 @@ check_PROGRAMS += \
|
|||
|
||||
LDADD = \
|
||||
libbroadcom.la \
|
||||
$(top_builddir)/src/compiler/nir/libnir.la \
|
||||
$(top_builddir)/src/util/libmesautil.la \
|
||||
$(NULL)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Validates the QPU instruction sequence after register allocation and
|
||||
* scheduling.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "v3d_compiler.h"
|
||||
#include "qpu/qpu_disasm.h"
|
||||
|
||||
struct v3d_qpu_validate_state {
|
||||
struct v3d_compile *c;
|
||||
const struct v3d_qpu_instr *last;
|
||||
int ip;
|
||||
int last_sfu_write;
|
||||
};
|
||||
|
||||
static void
|
||||
fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
|
||||
{
|
||||
struct v3d_compile *c = state->c;
|
||||
|
||||
fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
|
||||
|
||||
int dump_ip = 0;
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
v3d_qpu_dump(c->devinfo, &inst->qpu);
|
||||
|
||||
if (dump_ip++ == state->ip)
|
||||
fprintf(stderr, " *** ERROR ***");
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
static bool
|
||||
qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
|
||||
bool (*predicate)(enum v3d_qpu_waddr waddr))
|
||||
{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
|
||||
return false;
|
||||
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
inst->alu.add.magic_write &&
|
||||
predicate(inst->alu.add.waddr))
|
||||
return true;
|
||||
|
||||
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
|
||||
inst->alu.mul.magic_write &&
|
||||
predicate(inst->alu.mul.waddr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
{
|
||||
const struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return;
|
||||
|
||||
/* LDVARY writes r5 two instructions later and LDUNIF writes
|
||||
* r5 one instruction later, which is illegal to have
|
||||
* together.
|
||||
*/
|
||||
if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
|
||||
fail_instr(state, "LDUNIF after a LDVARY");
|
||||
}
|
||||
|
||||
int tmu_writes = 0;
|
||||
int sfu_writes = 0;
|
||||
int vpm_writes = 0;
|
||||
int tlb_writes = 0;
|
||||
int tsy_writes = 0;
|
||||
|
||||
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
||||
if (inst->alu.add.magic_write) {
|
||||
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
|
||||
tmu_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
|
||||
sfu_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
|
||||
vpm_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
|
||||
tlb_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
|
||||
tsy_writes++;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
||||
if (inst->alu.mul.magic_write) {
|
||||
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
|
||||
tmu_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
|
||||
sfu_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
|
||||
vpm_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
|
||||
tlb_writes++;
|
||||
if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
|
||||
tsy_writes++;
|
||||
}
|
||||
}
|
||||
|
||||
(void)qpu_magic_waddr_matches; /* XXX */
|
||||
|
||||
/* SFU r4 results come back two instructions later. No doing
|
||||
* r4 read/writes or other SFU lookups until it's done.
|
||||
*/
|
||||
if (state->ip - state->last_sfu_write < 2) {
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
|
||||
fail_instr(state, "R4 read too soon after SFU");
|
||||
|
||||
if (v3d_qpu_writes_r4(inst))
|
||||
fail_instr(state, "R4 write too soon after SFU");
|
||||
|
||||
if (sfu_writes)
|
||||
fail_instr(state, "SFU write too soon after SFU");
|
||||
}
|
||||
|
||||
/* XXX: The docs say VPM can happen with the others, but the simulator
|
||||
* disagrees.
|
||||
*/
|
||||
if (tmu_writes +
|
||||
sfu_writes +
|
||||
vpm_writes +
|
||||
tlb_writes +
|
||||
tsy_writes +
|
||||
inst->sig.ldtmu +
|
||||
inst->sig.ldtlb +
|
||||
inst->sig.ldvpm +
|
||||
inst->sig.ldtlbu > 1) {
|
||||
fail_instr(state,
|
||||
"Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
|
||||
}
|
||||
|
||||
if (sfu_writes)
|
||||
state->last_sfu_write = state->ip;
|
||||
}
|
||||
|
||||
static void
|
||||
qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
|
||||
{
|
||||
vir_for_each_inst(qinst, block) {
|
||||
qpu_validate_inst(state, qinst);
|
||||
|
||||
state->last = &qinst->qpu;
|
||||
state->ip++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for the instruction restrictions from page 37 ("Summary of
|
||||
* Instruction Restrictions").
|
||||
*/
|
||||
void
|
||||
qpu_validate(struct v3d_compile *c)
|
||||
{
|
||||
/* We don't want to do validation in release builds, but we want to
|
||||
* keep compiling the validation code to make sure it doesn't get
|
||||
* broken.
|
||||
*/
|
||||
#ifndef DEBUG
|
||||
return;
|
||||
#endif
|
||||
|
||||
struct v3d_qpu_validate_state state = {
|
||||
.c = c,
|
||||
.last_sfu_write = -10,
|
||||
.ip = 0,
|
||||
};
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
qpu_validate_block(&state, block);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright © 2016 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
struct v3d_compiler *
|
||||
v3d_compiler_init(void)
|
||||
{
|
||||
struct v3d_compile *c = rzalloc(struct v3d_compile);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
void
|
||||
v3d_add_qpu_inst(struct v3d_compiler *c, uint64_t inst)
|
||||
{
|
||||
if (c->qpu_inst_count >= c->qpu_inst_size) {
|
||||
c->qpu_inst_size = MAX2(c->qpu_inst_size * 2, 16);
|
||||
c->qpu_insts = reralloc(c, c->qpu_insts, uint64_t,
|
||||
c->qpu_inst_size_array_size);
|
||||
|
||||
}
|
||||
|
||||
c->qpu_insts[c->qpu_inst_count++] = inst;
|
||||
}
|
|
@ -0,0 +1,927 @@
|
|||
/*
|
||||
* Copyright © 2016 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef V3D_COMPILER_H
|
||||
#define V3D_COMPILER_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "common/v3d_debug.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/list.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "qpu/qpu_instr.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#define V3D_MAX_TEXTURE_SAMPLERS 32
|
||||
#define V3D_MAX_SAMPLES 4
|
||||
#define V3D_MAX_FS_INPUTS 64
|
||||
#define V3D_MAX_VS_INPUTS 64
|
||||
|
||||
struct nir_builder;
|
||||
|
||||
struct v3d_fs_inputs {
|
||||
/**
|
||||
* Array of the meanings of the VPM inputs this shader needs.
|
||||
*
|
||||
* It doesn't include those that aren't part of the VPM, like
|
||||
* point/line coordinates.
|
||||
*/
|
||||
struct v3d_varying_slot *input_slots;
|
||||
uint32_t num_inputs;
|
||||
};
|
||||
|
||||
enum qfile {
|
||||
/** An unused source or destination register. */
|
||||
QFILE_NULL,
|
||||
|
||||
/** A physical register, such as the W coordinate payload. */
|
||||
QFILE_REG,
|
||||
/** One of the regsiters for fixed function interactions. */
|
||||
QFILE_MAGIC,
|
||||
|
||||
/**
|
||||
* A virtual register, that will be allocated to actual accumulator
|
||||
* or physical registers later.
|
||||
*/
|
||||
QFILE_TEMP,
|
||||
QFILE_VARY,
|
||||
QFILE_UNIF,
|
||||
QFILE_TLB,
|
||||
QFILE_TLBU,
|
||||
|
||||
/**
|
||||
* VPM reads use this with an index value to say what part of the VPM
|
||||
* is being read.
|
||||
*/
|
||||
QFILE_VPM,
|
||||
|
||||
/**
|
||||
* Stores an immediate value in the index field that will be used
|
||||
* directly by qpu_load_imm().
|
||||
*/
|
||||
QFILE_LOAD_IMM,
|
||||
|
||||
/**
|
||||
* Stores an immediate value in the index field that can be turned
|
||||
* into a small immediate field by qpu_encode_small_immediate().
|
||||
*/
|
||||
QFILE_SMALL_IMM,
|
||||
};
|
||||
|
||||
/**
|
||||
* A reference to a QPU register or a virtual temp register.
|
||||
*/
|
||||
struct qreg {
|
||||
enum qfile file;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
static inline struct qreg vir_reg(enum qfile file, uint32_t index)
|
||||
{
|
||||
return (struct qreg){file, index};
|
||||
}
|
||||
|
||||
/**
|
||||
* A reference to an actual register at the QPU level, for register
|
||||
* allocation.
|
||||
*/
|
||||
struct qpu_reg {
|
||||
bool magic;
|
||||
int index;
|
||||
};
|
||||
|
||||
struct qinst {
|
||||
/** Entry in qblock->instructions */
|
||||
struct list_head link;
|
||||
|
||||
/**
|
||||
* The instruction being wrapped. Its condition codes, pack flags,
|
||||
* signals, etc. will all be used, with just the register references
|
||||
* being replaced by the contents of qinst->dst and qinst->src[].
|
||||
*/
|
||||
struct v3d_qpu_instr qpu;
|
||||
|
||||
/* Pre-register-allocation references to src/dst registers */
|
||||
struct qreg dst;
|
||||
struct qreg src[3];
|
||||
bool cond_is_exec_mask;
|
||||
bool has_implicit_uniform;
|
||||
|
||||
/* After vir_to_qpu.c: If instr reads a uniform, which uniform from
|
||||
* the uncompiled stream it is.
|
||||
*/
|
||||
int uniform;
|
||||
};
|
||||
|
||||
enum quniform_contents {
|
||||
/**
|
||||
* Indicates that a constant 32-bit value is copied from the program's
|
||||
* uniform contents.
|
||||
*/
|
||||
QUNIFORM_CONSTANT,
|
||||
/**
|
||||
* Indicates that the program's uniform contents are used as an index
|
||||
* into the GL uniform storage.
|
||||
*/
|
||||
QUNIFORM_UNIFORM,
|
||||
|
||||
/** @{
|
||||
* Scaling factors from clip coordinates to relative to the viewport
|
||||
* center.
|
||||
*
|
||||
* This is used by the coordinate and vertex shaders to produce the
|
||||
* 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
|
||||
* point offsets from the viewport ccenter.
|
||||
*/
|
||||
QUNIFORM_VIEWPORT_X_SCALE,
|
||||
QUNIFORM_VIEWPORT_Y_SCALE,
|
||||
/** @} */
|
||||
|
||||
QUNIFORM_VIEWPORT_Z_OFFSET,
|
||||
QUNIFORM_VIEWPORT_Z_SCALE,
|
||||
|
||||
QUNIFORM_USER_CLIP_PLANE,
|
||||
|
||||
/**
|
||||
* A reference to a texture config parameter 0 uniform.
|
||||
*
|
||||
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
|
||||
* defines texture type, miplevels, and such. It will be found as a
|
||||
* parameter to the first QOP_TEX_[STRB] instruction in a sequence.
|
||||
*/
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_0,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_1,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_2,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_3,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_4,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_5,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_6,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_7,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_8,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_9,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_10,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_11,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_12,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_13,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_14,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_15,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_16,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_17,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_18,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_19,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_20,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_21,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_22,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_23,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_24,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_25,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_26,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_27,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_28,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_29,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_30,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_31,
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_32,
|
||||
|
||||
/**
|
||||
* A reference to a texture config parameter 1 uniform.
|
||||
*
|
||||
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
|
||||
* defines texture width, height, filters, and wrap modes. It will be
|
||||
* found as a parameter to the second QOP_TEX_[STRB] instruction in a
|
||||
* sequence.
|
||||
*/
|
||||
QUNIFORM_TEXTURE_CONFIG_P1,
|
||||
|
||||
QUNIFORM_TEXTURE_FIRST_LEVEL,
|
||||
|
||||
QUNIFORM_TEXTURE_WIDTH,
|
||||
QUNIFORM_TEXTURE_HEIGHT,
|
||||
QUNIFORM_TEXTURE_DEPTH,
|
||||
QUNIFORM_TEXTURE_ARRAY_SIZE,
|
||||
QUNIFORM_TEXTURE_LEVELS,
|
||||
|
||||
QUNIFORM_TEXTURE_MSAA_ADDR,
|
||||
|
||||
QUNIFORM_UBO_ADDR,
|
||||
|
||||
QUNIFORM_TEXRECT_SCALE_X,
|
||||
QUNIFORM_TEXRECT_SCALE_Y,
|
||||
|
||||
QUNIFORM_TEXTURE_BORDER_COLOR,
|
||||
|
||||
QUNIFORM_STENCIL,
|
||||
|
||||
QUNIFORM_ALPHA_REF,
|
||||
QUNIFORM_SAMPLE_MASK,
|
||||
};
|
||||
|
||||
struct v3d_varying_slot {
|
||||
uint8_t slot_and_component;
|
||||
};
|
||||
|
||||
static inline struct v3d_varying_slot
|
||||
v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component)
|
||||
{
|
||||
assert(slot < 255 / 4);
|
||||
return (struct v3d_varying_slot){ (slot << 2) + component };
|
||||
}
|
||||
|
||||
static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot)
|
||||
{
|
||||
return slot.slot_and_component >> 2;
|
||||
}
|
||||
|
||||
static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
|
||||
{
|
||||
return slot.slot_and_component & 3;
|
||||
}
|
||||
|
||||
struct v3d_ubo_range {
|
||||
/**
|
||||
* offset in bytes from the start of the ubo where this range is
|
||||
* uploaded.
|
||||
*
|
||||
* Only set once used is set.
|
||||
*/
|
||||
uint32_t dst_offset;
|
||||
|
||||
/**
|
||||
* offset in bytes from the start of the gallium uniforms where the
|
||||
* data comes from.
|
||||
*/
|
||||
uint32_t src_offset;
|
||||
|
||||
/** size in bytes of this ubo range */
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
struct v3d_key {
|
||||
void *shader_state;
|
||||
struct {
|
||||
uint8_t swizzle[4];
|
||||
uint8_t return_size;
|
||||
uint8_t return_channels;
|
||||
union {
|
||||
struct {
|
||||
unsigned compare_mode:1;
|
||||
unsigned compare_func:3;
|
||||
unsigned wrap_s:3;
|
||||
unsigned wrap_t:3;
|
||||
};
|
||||
struct {
|
||||
uint16_t msaa_width, msaa_height;
|
||||
};
|
||||
};
|
||||
} tex[V3D_MAX_TEXTURE_SAMPLERS];
|
||||
uint8_t ucp_enables;
|
||||
};
|
||||
|
||||
struct v3d_fs_key {
|
||||
struct v3d_key base;
|
||||
bool depth_enabled;
|
||||
bool is_points;
|
||||
bool is_lines;
|
||||
bool alpha_test;
|
||||
bool point_coord_upper_left;
|
||||
bool light_twoside;
|
||||
bool msaa;
|
||||
bool sample_coverage;
|
||||
bool sample_alpha_to_coverage;
|
||||
bool sample_alpha_to_one;
|
||||
bool clamp_color;
|
||||
bool swap_color_rb;
|
||||
uint8_t alpha_test_func;
|
||||
uint8_t logicop_func;
|
||||
uint32_t point_sprite_mask;
|
||||
|
||||
struct pipe_rt_blend_state blend;
|
||||
};
|
||||
|
||||
struct v3d_vs_key {
|
||||
struct v3d_key base;
|
||||
|
||||
struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS];
|
||||
uint8_t num_fs_inputs;
|
||||
|
||||
bool is_coord;
|
||||
bool per_vertex_point_size;
|
||||
bool clamp_color;
|
||||
};
|
||||
|
||||
/** A basic block of VIR intructions. */
|
||||
struct qblock {
|
||||
struct list_head link;
|
||||
|
||||
struct list_head instructions;
|
||||
|
||||
struct set *predecessors;
|
||||
struct qblock *successors[2];
|
||||
|
||||
int index;
|
||||
|
||||
/* Instruction IPs for the first and last instruction of the block.
|
||||
* Set by qpu_schedule.c.
|
||||
*/
|
||||
uint32_t start_qpu_ip;
|
||||
uint32_t end_qpu_ip;
|
||||
|
||||
/* Instruction IP for the branch instruction of the block. Set by
|
||||
* qpu_schedule.c.
|
||||
*/
|
||||
uint32_t branch_qpu_ip;
|
||||
|
||||
/** Offset within the uniform stream at the start of the block. */
|
||||
uint32_t start_uniform;
|
||||
/** Offset within the uniform stream of the branch instruction */
|
||||
uint32_t branch_uniform;
|
||||
|
||||
/** @{ used by v3d_vir_live_variables.c */
|
||||
BITSET_WORD *def;
|
||||
BITSET_WORD *use;
|
||||
BITSET_WORD *live_in;
|
||||
BITSET_WORD *live_out;
|
||||
int start_ip, end_ip;
|
||||
/** @} */
|
||||
};
|
||||
|
||||
/**
|
||||
* Compiler state saved across compiler invocations, for any expensive global
|
||||
* setup.
|
||||
*/
|
||||
struct v3d_compiler {
|
||||
const struct v3d_device_info *devinfo;
|
||||
struct ra_regs *regs;
|
||||
unsigned int reg_class[3];
|
||||
};
|
||||
|
||||
struct v3d_compile {
|
||||
const struct v3d_device_info *devinfo;
|
||||
nir_shader *s;
|
||||
nir_function_impl *impl;
|
||||
struct exec_list *cf_node_list;
|
||||
const struct v3d_compiler *compiler;
|
||||
|
||||
/**
|
||||
* Mapping from nir_register * or nir_ssa_def * to array of struct
|
||||
* qreg for the values.
|
||||
*/
|
||||
struct hash_table *def_ht;
|
||||
|
||||
/* For each temp, the instruction generating its value. */
|
||||
struct qinst **defs;
|
||||
uint32_t defs_array_size;
|
||||
|
||||
/**
|
||||
* Inputs to the shader, arranged by TGSI declaration order.
|
||||
*
|
||||
* Not all fragment shader QFILE_VARY reads are present in this array.
|
||||
*/
|
||||
struct qreg *inputs;
|
||||
struct qreg *outputs;
|
||||
bool msaa_per_sample_output;
|
||||
struct qreg color_reads[V3D_MAX_SAMPLES];
|
||||
struct qreg sample_colors[V3D_MAX_SAMPLES];
|
||||
uint32_t inputs_array_size;
|
||||
uint32_t outputs_array_size;
|
||||
uint32_t uniforms_array_size;
|
||||
|
||||
/* Booleans for whether the corresponding QFILE_VARY[i] is
|
||||
* flat-shaded. This doesn't count gl_FragColor flat-shading, which is
|
||||
* controlled by shader->color_inputs and rasterizer->flatshade in the
|
||||
* gallium driver.
|
||||
*/
|
||||
BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
|
||||
|
||||
struct v3d_ubo_range *ubo_ranges;
|
||||
bool *ubo_range_used;
|
||||
uint32_t ubo_ranges_array_size;
|
||||
/** Number of uniform areas tracked in ubo_ranges. */
|
||||
uint32_t num_ubo_ranges;
|
||||
uint32_t next_ubo_dst_offset;
|
||||
|
||||
/* State for whether we're executing on each channel currently. 0 if
|
||||
* yes, otherwise a block number + 1 that the channel jumped to.
|
||||
*/
|
||||
struct qreg execute;
|
||||
|
||||
struct qreg line_x, point_x, point_y;
|
||||
|
||||
/**
|
||||
* Instance ID, which comes in before the vertex attribute payload if
|
||||
* the shader record requests it.
|
||||
*/
|
||||
struct qreg iid;
|
||||
|
||||
/**
|
||||
* Vertex ID, which comes in before the vertex attribute payload
|
||||
* (after Instance ID) if the shader record requests it.
|
||||
*/
|
||||
struct qreg vid;
|
||||
|
||||
/* Fragment shader payload regs. */
|
||||
struct qreg payload_w, payload_w_centroid, payload_z;
|
||||
|
||||
/** boolean (~0 -> true) if the fragment has been discarded. */
|
||||
struct qreg discard;
|
||||
|
||||
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
|
||||
uint32_t num_vpm_writes;
|
||||
|
||||
/**
|
||||
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
|
||||
*
|
||||
* This includes those that aren't part of the VPM varyings, like
|
||||
* point/line coordinates.
|
||||
*/
|
||||
struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
|
||||
|
||||
/**
|
||||
* An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
|
||||
* of the output is. Used to emit from the VS in the order that the
|
||||
* FS needs.
|
||||
*/
|
||||
struct v3d_varying_slot *output_slots;
|
||||
|
||||
struct pipe_shader_state *shader_state;
|
||||
struct v3d_key *key;
|
||||
struct v3d_fs_key *fs_key;
|
||||
struct v3d_vs_key *vs_key;
|
||||
|
||||
/* Live ranges of temps. */
|
||||
int *temp_start, *temp_end;
|
||||
|
||||
uint32_t *uniform_data;
|
||||
enum quniform_contents *uniform_contents;
|
||||
uint32_t uniform_array_size;
|
||||
uint32_t num_uniforms;
|
||||
uint32_t num_outputs;
|
||||
uint32_t output_position_index;
|
||||
nir_variable *output_color_var;
|
||||
uint32_t output_point_size_index;
|
||||
uint32_t output_sample_mask_index;
|
||||
|
||||
struct qreg undef;
|
||||
uint32_t num_temps;
|
||||
|
||||
struct list_head blocks;
|
||||
int next_block_index;
|
||||
struct qblock *cur_block;
|
||||
struct qblock *loop_cont_block;
|
||||
struct qblock *loop_break_block;
|
||||
|
||||
uint64_t *qpu_insts;
|
||||
uint32_t qpu_inst_count;
|
||||
uint32_t qpu_inst_size;
|
||||
|
||||
/* For the FS, the number of varying inputs not counting the
|
||||
* point/line varyings payload
|
||||
*/
|
||||
uint32_t num_inputs;
|
||||
|
||||
/**
|
||||
* Number of inputs from num_inputs remaining to be queued to the read
|
||||
* FIFO in the VS/CS.
|
||||
*/
|
||||
uint32_t num_inputs_remaining;
|
||||
|
||||
/* Number of inputs currently in the read FIFO for the VS/CS */
|
||||
uint32_t num_inputs_in_fifo;
|
||||
|
||||
/** Next offset in the VPM to read from in the VS/CS */
|
||||
uint32_t vpm_read_offset;
|
||||
|
||||
uint32_t program_id;
|
||||
uint32_t variant_id;
|
||||
|
||||
/* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
|
||||
* is used to hide texturing latency at the cost of limiting ourselves
|
||||
* to the bottom half of physical reg space.
|
||||
*/
|
||||
bool fs_threaded;
|
||||
|
||||
bool last_thrsw_at_top_level;
|
||||
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct v3d_uniform_list {
|
||||
enum quniform_contents *contents;
|
||||
uint32_t *data;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct v3d_prog_data {
|
||||
struct v3d_uniform_list uniforms;
|
||||
|
||||
struct v3d_ubo_range *ubo_ranges;
|
||||
uint32_t num_ubo_ranges;
|
||||
uint32_t ubo_size;
|
||||
|
||||
uint8_t num_inputs;
|
||||
|
||||
};
|
||||
|
||||
struct v3d_vs_prog_data {
|
||||
struct v3d_prog_data base;
|
||||
|
||||
bool uses_iid, uses_vid;
|
||||
|
||||
/* Number of components read from each vertex attribute. */
|
||||
uint8_t vattr_sizes[32];
|
||||
|
||||
/* Total number of components read, for the shader state record. */
|
||||
uint32_t vpm_input_size;
|
||||
|
||||
/* Total number of components written, for the shader state record. */
|
||||
uint32_t vpm_output_size;
|
||||
};
|
||||
|
||||
struct v3d_fs_prog_data {
|
||||
struct v3d_prog_data base;
|
||||
|
||||
struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
|
||||
|
||||
/** bitmask of which inputs are color inputs, for flat shade handling. */
|
||||
uint32_t color_inputs[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
|
||||
|
||||
/* Bitmask for whether the corresponding input is flat-shaded,
|
||||
* independent of rasterizer (gl_FragColor) flat-shading.
|
||||
*/
|
||||
BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
|
||||
|
||||
bool writes_z;
|
||||
};
|
||||
|
||||
/* Special nir_load_input intrinsic index for loading the current TLB
|
||||
* destination color.
|
||||
*/
|
||||
#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000
|
||||
|
||||
#define V3D_NIR_MS_MASK_OUTPUT 2000000000
|
||||
|
||||
extern const nir_shader_compiler_options v3d_nir_options;
|
||||
|
||||
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
|
||||
void v3d_compiler_free(const struct v3d_compiler *compiler);
|
||||
void v3d_optimize_nir(struct nir_shader *s);
|
||||
|
||||
uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
|
||||
struct v3d_vs_key *key,
|
||||
struct v3d_vs_prog_data *prog_data,
|
||||
nir_shader *s,
|
||||
int program_id, int variant_id,
|
||||
uint32_t *final_assembly_size);
|
||||
|
||||
uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
|
||||
struct v3d_fs_key *key,
|
||||
struct v3d_fs_prog_data *prog_data,
|
||||
nir_shader *s,
|
||||
int program_id, int variant_id,
|
||||
uint32_t *final_assembly_size);
|
||||
|
||||
void v3d_nir_to_vir(struct v3d_compile *c);
|
||||
|
||||
void vir_compile_destroy(struct v3d_compile *c);
|
||||
const char *vir_get_stage_name(struct v3d_compile *c);
|
||||
struct qblock *vir_new_block(struct v3d_compile *c);
|
||||
void vir_set_emit_block(struct v3d_compile *c, struct qblock *block);
|
||||
void vir_link_blocks(struct qblock *predecessor, struct qblock *successor);
|
||||
struct qblock *vir_entry_block(struct v3d_compile *c);
|
||||
struct qblock *vir_exit_block(struct v3d_compile *c);
|
||||
struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst,
|
||||
struct qreg src0, struct qreg src1);
|
||||
struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst,
|
||||
struct qreg src0, struct qreg src1);
|
||||
struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0);
|
||||
void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst);
|
||||
struct qreg vir_uniform(struct v3d_compile *c,
|
||||
enum quniform_contents contents,
|
||||
uint32_t data);
|
||||
void vir_schedule_instructions(struct v3d_compile *c);
|
||||
struct v3d_qpu_instr v3d_qpu_nop(void);
|
||||
|
||||
struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
|
||||
struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
|
||||
void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
|
||||
void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
|
||||
void vir_set_unpack(struct qinst *inst, int src,
|
||||
enum v3d_qpu_input_unpack unpack);
|
||||
|
||||
struct qreg vir_get_temp(struct v3d_compile *c);
|
||||
void vir_calculate_live_intervals(struct v3d_compile *c);
|
||||
bool vir_has_implicit_uniform(struct qinst *inst);
|
||||
int vir_get_implicit_uniform_src(struct qinst *inst);
|
||||
int vir_get_non_sideband_nsrc(struct qinst *inst);
|
||||
int vir_get_nsrc(struct qinst *inst);
|
||||
bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
|
||||
bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
|
||||
bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
|
||||
bool vir_is_raw_mov(struct qinst *inst);
|
||||
bool vir_is_tex(struct qinst *inst);
|
||||
bool vir_is_add(struct qinst *inst);
|
||||
bool vir_is_mul(struct qinst *inst);
|
||||
bool vir_is_float_input(struct qinst *inst);
|
||||
bool vir_depends_on_flags(struct qinst *inst);
|
||||
bool vir_writes_r3(struct qinst *inst);
|
||||
bool vir_writes_r4(struct qinst *inst);
|
||||
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
||||
uint8_t vir_channels_written(struct qinst *inst);
|
||||
|
||||
void vir_dump(struct v3d_compile *c);
|
||||
void vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
|
||||
|
||||
void vir_validate(struct v3d_compile *c);
|
||||
|
||||
void vir_optimize(struct v3d_compile *c);
|
||||
bool vir_opt_algebraic(struct v3d_compile *c);
|
||||
bool vir_opt_constant_folding(struct v3d_compile *c);
|
||||
bool vir_opt_copy_propagate(struct v3d_compile *c);
|
||||
bool vir_opt_dead_code(struct v3d_compile *c);
|
||||
bool vir_opt_peephole_sf(struct v3d_compile *c);
|
||||
bool vir_opt_small_immediates(struct v3d_compile *c);
|
||||
bool vir_opt_vpm(struct v3d_compile *c);
|
||||
void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
|
||||
void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
|
||||
void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
|
||||
void vir_lower_uniforms(struct v3d_compile *c);
|
||||
|
||||
void v3d_vir_to_qpu(struct v3d_compile *c);
|
||||
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
|
||||
void qpu_validate(struct v3d_compile *c);
|
||||
struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);
|
||||
bool vir_init_reg_sets(struct v3d_compiler *compiler);
|
||||
|
||||
void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
|
||||
|
||||
static inline bool
|
||||
quniform_contents_is_texture_p0(enum quniform_contents contents)
|
||||
{
|
||||
return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 &&
|
||||
contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 +
|
||||
V3D_MAX_TEXTURE_SAMPLERS));
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
vir_uniform_ui(struct v3d_compile *c, uint32_t ui)
|
||||
{
|
||||
return vir_uniform(c, QUNIFORM_CONSTANT, ui);
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
vir_uniform_f(struct v3d_compile *c, float f)
|
||||
{
|
||||
return vir_uniform(c, QUNIFORM_CONSTANT, fui(f));
|
||||
}
|
||||
|
||||
#define VIR_ALU0(name, vir_inst, op) \
|
||||
static inline struct qreg \
|
||||
vir_##name(struct v3d_compile *c) \
|
||||
{ \
|
||||
return vir_emit_def(c, vir_inst(op, c->undef, \
|
||||
c->undef, c->undef)); \
|
||||
} \
|
||||
static inline struct qinst * \
|
||||
vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \
|
||||
{ \
|
||||
return vir_emit_nondef(c, vir_inst(op, dest, \
|
||||
c->undef, c->undef)); \
|
||||
}
|
||||
|
||||
#define VIR_ALU1(name, vir_inst, op) \
|
||||
static inline struct qreg \
|
||||
vir_##name(struct v3d_compile *c, struct qreg a) \
|
||||
{ \
|
||||
return vir_emit_def(c, vir_inst(op, c->undef, \
|
||||
a, c->undef)); \
|
||||
} \
|
||||
static inline struct qinst * \
|
||||
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
|
||||
struct qreg a) \
|
||||
{ \
|
||||
return vir_emit_nondef(c, vir_inst(op, dest, a, \
|
||||
c->undef)); \
|
||||
}
|
||||
|
||||
#define VIR_ALU2(name, vir_inst, op) \
|
||||
static inline struct qreg \
|
||||
vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
|
||||
{ \
|
||||
return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \
|
||||
} \
|
||||
static inline struct qinst * \
|
||||
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
|
||||
struct qreg a, struct qreg b) \
|
||||
{ \
|
||||
return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \
|
||||
}
|
||||
|
||||
#define VIR_NODST_1(name, vir_inst, op) \
|
||||
static inline struct qinst * \
|
||||
vir_##name(struct v3d_compile *c, struct qreg a) \
|
||||
{ \
|
||||
return vir_emit_nondef(c, vir_inst(op, c->undef, \
|
||||
a, c->undef)); \
|
||||
}
|
||||
|
||||
#define VIR_NODST_2(name, vir_inst, op) \
|
||||
static inline struct qinst * \
|
||||
vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
|
||||
{ \
|
||||
return vir_emit_nondef(c, vir_inst(op, c->undef, \
|
||||
a, b)); \
|
||||
}
|
||||
|
||||
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
|
||||
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name)
|
||||
#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name)
|
||||
#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name)
|
||||
#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name)
|
||||
|
||||
VIR_A_ALU2(FADD)
|
||||
VIR_A_ALU2(VFPACK)
|
||||
VIR_A_ALU2(FSUB)
|
||||
VIR_A_ALU2(FMIN)
|
||||
VIR_A_ALU2(FMAX)
|
||||
|
||||
VIR_A_ALU2(ADD)
|
||||
VIR_A_ALU2(SUB)
|
||||
VIR_A_ALU2(SHL)
|
||||
VIR_A_ALU2(SHR)
|
||||
VIR_A_ALU2(ASR)
|
||||
VIR_A_ALU2(ROR)
|
||||
VIR_A_ALU2(MIN)
|
||||
VIR_A_ALU2(MAX)
|
||||
VIR_A_ALU2(UMIN)
|
||||
VIR_A_ALU2(UMAX)
|
||||
VIR_A_ALU2(AND)
|
||||
VIR_A_ALU2(OR)
|
||||
VIR_A_ALU2(XOR)
|
||||
VIR_A_ALU2(VADD)
|
||||
VIR_A_ALU2(VSUB)
|
||||
VIR_A_ALU1(NOT)
|
||||
VIR_A_ALU1(NEG)
|
||||
VIR_A_ALU1(FLAPUSH)
|
||||
VIR_A_ALU1(FLBPUSH)
|
||||
VIR_A_ALU1(FLBPOP)
|
||||
VIR_A_ALU1(SETMSF)
|
||||
VIR_A_ALU1(SETREVF)
|
||||
VIR_A_ALU1(TIDX)
|
||||
VIR_A_ALU1(EIDX)
|
||||
|
||||
VIR_A_ALU0(FXCD)
|
||||
VIR_A_ALU0(XCD)
|
||||
VIR_A_ALU0(FYCD)
|
||||
VIR_A_ALU0(YCD)
|
||||
VIR_A_ALU0(MSF)
|
||||
VIR_A_ALU0(REVF)
|
||||
VIR_A_NODST_1(VPMSETUP)
|
||||
VIR_A_ALU2(FCMP)
|
||||
VIR_A_ALU2(VFMAX)
|
||||
|
||||
VIR_A_ALU1(FROUND)
|
||||
VIR_A_ALU1(FTOIN)
|
||||
VIR_A_ALU1(FTRUNC)
|
||||
VIR_A_ALU1(FTOIZ)
|
||||
VIR_A_ALU1(FFLOOR)
|
||||
VIR_A_ALU1(FTOUZ)
|
||||
VIR_A_ALU1(FCEIL)
|
||||
VIR_A_ALU1(FTOC)
|
||||
|
||||
VIR_A_ALU1(FDX)
|
||||
VIR_A_ALU1(FDY)
|
||||
|
||||
VIR_A_ALU1(ITOF)
|
||||
VIR_A_ALU1(CLZ)
|
||||
VIR_A_ALU1(UTOF)
|
||||
|
||||
VIR_M_ALU2(UMUL24)
|
||||
VIR_M_ALU2(FMUL)
|
||||
VIR_M_ALU2(SMUL24)
|
||||
VIR_M_NODST_2(MULTOP)
|
||||
|
||||
VIR_M_ALU1(MOV)
|
||||
VIR_M_ALU1(FMOV)
|
||||
|
||||
static inline struct qinst *
|
||||
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
|
||||
struct qreg dest, struct qreg src)
|
||||
{
|
||||
struct qinst *mov = vir_MOV_dest(c, dest, src);
|
||||
vir_set_cond(mov, cond);
|
||||
return mov;
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
|
||||
struct qreg src0, struct qreg src1)
|
||||
{
|
||||
struct qreg t = vir_get_temp(c);
|
||||
vir_MOV_dest(c, t, src1);
|
||||
vir_MOV_cond(c, cond, t, src0);
|
||||
return t;
|
||||
}
|
||||
|
||||
static inline void
|
||||
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
|
||||
{
|
||||
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
|
||||
}
|
||||
|
||||
static inline struct qinst *
|
||||
vir_NOP(struct v3d_compile *c)
|
||||
{
|
||||
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP,
|
||||
c->undef, c->undef, c->undef));
|
||||
}
|
||||
/*
|
||||
static inline struct qreg
|
||||
vir_LOAD_IMM(struct v3d_compile *c, uint32_t val)
|
||||
{
|
||||
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef,
|
||||
vir_reg(QFILE_LOAD_IMM, val), c->undef));
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val)
|
||||
{
|
||||
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef,
|
||||
vir_reg(QFILE_LOAD_IMM, val),
|
||||
c->undef));
|
||||
}
|
||||
static inline struct qreg
|
||||
vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val)
|
||||
{
|
||||
return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef,
|
||||
vir_reg(QFILE_LOAD_IMM, val),
|
||||
c->undef));
|
||||
}
|
||||
*/
|
||||
|
||||
static inline struct qinst *
|
||||
vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond)
|
||||
{
|
||||
/* The actual uniform_data value will be set at scheduling time */
|
||||
return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0)));
|
||||
}
|
||||
|
||||
#define vir_for_each_block(block, c) \
|
||||
list_for_each_entry(struct qblock, block, &c->blocks, link)
|
||||
|
||||
#define vir_for_each_block_rev(block, c) \
|
||||
list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
|
||||
|
||||
/* Loop over the non-NULL members of the successors array. */
|
||||
#define vir_for_each_successor(succ, block) \
|
||||
for (struct qblock *succ = block->successors[0]; \
|
||||
succ != NULL; \
|
||||
succ = (succ == block->successors[1] ? NULL : \
|
||||
block->successors[1]))
|
||||
|
||||
#define vir_for_each_inst(inst, block) \
|
||||
list_for_each_entry(struct qinst, inst, &block->instructions, link)
|
||||
|
||||
#define vir_for_each_inst_rev(inst, block) \
|
||||
list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
|
||||
|
||||
#define vir_for_each_inst_safe(inst, block) \
|
||||
list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
|
||||
|
||||
#define vir_for_each_inst_inorder(inst, c) \
|
||||
vir_for_each_block(_block, c) \
|
||||
vir_for_each_inst(inst, _block)
|
||||
|
||||
#endif /* V3D_COMPILER_H */
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Copyright © 2015 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/v3d_compiler.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
|
||||
* intrinsics into something amenable to the V3D architecture.
|
||||
*
|
||||
* Currently, it splits VS inputs and uniforms into scalars, drops any
|
||||
* non-position outputs in coordinate shaders, and fixes up the addressing on
|
||||
* indirect uniform loads. FS input and VS output scalarization is handled by
|
||||
* nir_lower_io_to_scalar().
|
||||
*/
|
||||
|
||||
static void
|
||||
replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_ssa_def **comps)
|
||||
{
|
||||
|
||||
/* Batch things back together into a vector. This will get split by
|
||||
* the later ALU scalarization pass.
|
||||
*/
|
||||
nir_ssa_def *vec = nir_vec(b, comps, intr->num_components);
|
||||
|
||||
/* Replace the old intrinsic with a reference to our reconstructed
|
||||
* vector.
|
||||
*/
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
|
||||
nir_instr_remove(&intr->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_nir_lower_output(struct v3d_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_variable *output_var = NULL;
|
||||
nir_foreach_variable(var, &c->s->outputs) {
|
||||
if (var->data.driver_location == nir_intrinsic_base(intr)) {
|
||||
output_var = var;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(output_var);
|
||||
|
||||
if (c->vs_key) {
|
||||
int slot = output_var->data.location;
|
||||
bool used = false;
|
||||
|
||||
switch (slot) {
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_POS:
|
||||
used = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
|
||||
if (v3d_slot_get_slot(c->vs_key->fs_inputs[i]) == slot) {
|
||||
used = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!used)
|
||||
nir_instr_remove(&intr->instr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
/* Generate scalar loads equivalent to the original vector. */
|
||||
nir_ssa_def *dests[4];
|
||||
for (unsigned i = 0; i < intr->num_components; i++) {
|
||||
nir_intrinsic_instr *intr_comp =
|
||||
nir_intrinsic_instr_create(c->s, intr->intrinsic);
|
||||
intr_comp->num_components = 1;
|
||||
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
|
||||
|
||||
/* Convert the uniform offset to bytes. If it happens
|
||||
* to be a constant, constant-folding will clean up
|
||||
* the shift for us.
|
||||
*/
|
||||
nir_intrinsic_set_base(intr_comp,
|
||||
nir_intrinsic_base(intr) * 16 +
|
||||
i * 4);
|
||||
|
||||
intr_comp->src[0] =
|
||||
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
|
||||
nir_imm_int(b, 4)));
|
||||
|
||||
dests[i] = &intr_comp->dest.ssa;
|
||||
|
||||
nir_builder_instr_insert(b, &intr_comp->instr);
|
||||
}
|
||||
|
||||
replace_intrinsic_with_vec(b, intr, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
|
||||
struct nir_instr *instr)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
v3d_nir_lower_output(c, b, intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_uniform:
|
||||
v3d_nir_lower_uniform(c, b, intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_user_clip_plane:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
v3d_nir_lower_io_impl(struct v3d_compile *c, nir_function_impl *impl)
|
||||
{
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block)
|
||||
v3d_nir_lower_io_instr(c, &b, instr);
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
|
||||
{
|
||||
nir_foreach_function(function, s) {
|
||||
if (function->impl)
|
||||
v3d_nir_lower_io_impl(c, function->impl);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,907 @@
|
|||
/*
|
||||
* Copyright © 2016-2017 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
int
|
||||
vir_get_non_sideband_nsrc(struct qinst *inst)
|
||||
{
|
||||
switch (inst->qpu.type) {
|
||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
||||
return 0;
|
||||
case V3D_QPU_INSTR_TYPE_ALU:
|
||||
if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
|
||||
return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
|
||||
else
|
||||
return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vir_get_nsrc(struct qinst *inst)
|
||||
{
|
||||
int nsrc = vir_get_non_sideband_nsrc(inst);
|
||||
|
||||
if (vir_has_implicit_uniform(inst))
|
||||
nsrc++;
|
||||
|
||||
return nsrc;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_has_implicit_uniform(struct qinst *inst)
|
||||
{
|
||||
switch (inst->qpu.type) {
|
||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
||||
return true;
|
||||
case V3D_QPU_INSTR_TYPE_ALU:
|
||||
switch (inst->dst.file) {
|
||||
case QFILE_TLBU:
|
||||
return true;
|
||||
default:
|
||||
return inst->has_implicit_uniform;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The sideband uniform for textures gets stored after the normal ALU
|
||||
* arguments.
|
||||
*/
|
||||
int
|
||||
vir_get_implicit_uniform_src(struct qinst *inst)
|
||||
{
|
||||
return vir_get_nsrc(inst) - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the instruction has any side effects that must be
|
||||
* preserved.
|
||||
*/
|
||||
bool
|
||||
vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
switch (inst->qpu.type) {
|
||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
||||
return true;
|
||||
case V3D_QPU_INSTR_TYPE_ALU:
|
||||
switch (inst->qpu.alu.add.op) {
|
||||
case V3D_QPU_A_SETREVF:
|
||||
case V3D_QPU_A_SETMSF:
|
||||
case V3D_QPU_A_VPMSETUP:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (inst->qpu.alu.mul.op) {
|
||||
case V3D_QPU_M_MULTOP:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->qpu.sig.ldtmu)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_is_float_input(struct qinst *inst)
|
||||
{
|
||||
/* XXX: More instrs */
|
||||
switch (inst->qpu.type) {
|
||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
||||
return false;
|
||||
case V3D_QPU_INSTR_TYPE_ALU:
|
||||
switch (inst->qpu.alu.add.op) {
|
||||
case V3D_QPU_A_FADD:
|
||||
case V3D_QPU_A_FSUB:
|
||||
case V3D_QPU_A_FMIN:
|
||||
case V3D_QPU_A_FMAX:
|
||||
case V3D_QPU_A_FTOIN:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (inst->qpu.alu.mul.op) {
|
||||
case V3D_QPU_M_FMOV:
|
||||
case V3D_QPU_M_VFMUL:
|
||||
case V3D_QPU_M_FMUL:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_is_raw_mov(struct qinst *inst)
|
||||
{
|
||||
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
(inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
|
||||
inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
|
||||
inst->qpu.flags.mc != V3D_QPU_COND_NONE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_is_add(struct qinst *inst)
|
||||
{
|
||||
return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||
inst->qpu.alu.add.op != V3D_QPU_A_NOP);
|
||||
}
|
||||
|
||||
bool
|
||||
vir_is_mul(struct qinst *inst)
|
||||
{
|
||||
return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||
inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
|
||||
}
|
||||
|
||||
bool
|
||||
vir_is_tex(struct qinst *inst)
|
||||
{
|
||||
if (inst->dst.file == QFILE_MAGIC)
|
||||
return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_depends_on_flags(struct qinst *inst)
|
||||
{
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
|
||||
return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
|
||||
} else {
|
||||
return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
|
||||
inst->qpu.flags.mc != V3D_QPU_COND_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r3(struct qinst *inst)
|
||||
{
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
switch (inst->src[i].file) {
|
||||
case QFILE_VARY:
|
||||
case QFILE_VPM:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r4(struct qinst *inst)
|
||||
{
|
||||
switch (inst->dst.file) {
|
||||
case QFILE_MAGIC:
|
||||
switch (inst->dst.index) {
|
||||
case V3D_QPU_WADDR_RECIP:
|
||||
case V3D_QPU_WADDR_RSQRT:
|
||||
case V3D_QPU_WADDR_EXP:
|
||||
case V3D_QPU_WADDR_LOG:
|
||||
case V3D_QPU_WADDR_SIN:
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->qpu.sig.ldtmu)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
vir_set_unpack(struct qinst *inst, int src,
|
||||
enum v3d_qpu_input_unpack unpack)
|
||||
{
|
||||
assert(src == 0 || src == 1);
|
||||
|
||||
if (vir_is_add(inst)) {
|
||||
if (src == 0)
|
||||
inst->qpu.alu.add.a_unpack = unpack;
|
||||
else
|
||||
inst->qpu.alu.add.b_unpack = unpack;
|
||||
} else {
|
||||
assert(vir_is_mul(inst));
|
||||
if (src == 0)
|
||||
inst->qpu.alu.mul.a_unpack = unpack;
|
||||
else
|
||||
inst->qpu.alu.mul.b_unpack = unpack;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
|
||||
{
|
||||
if (vir_is_add(inst)) {
|
||||
inst->qpu.flags.ac = cond;
|
||||
} else {
|
||||
assert(vir_is_mul(inst));
|
||||
inst->qpu.flags.mc = cond;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
|
||||
{
|
||||
if (vir_is_add(inst)) {
|
||||
inst->qpu.flags.apf = pf;
|
||||
} else {
|
||||
assert(vir_is_mul(inst));
|
||||
inst->qpu.flags.mpf = pf;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
uint8_t
|
||||
vir_channels_written(struct qinst *inst)
|
||||
{
|
||||
if (vir_is_mul(inst)) {
|
||||
switch (inst->dst.pack) {
|
||||
case QPU_PACK_MUL_NOP:
|
||||
case QPU_PACK_MUL_8888:
|
||||
return 0xf;
|
||||
case QPU_PACK_MUL_8A:
|
||||
return 0x1;
|
||||
case QPU_PACK_MUL_8B:
|
||||
return 0x2;
|
||||
case QPU_PACK_MUL_8C:
|
||||
return 0x4;
|
||||
case QPU_PACK_MUL_8D:
|
||||
return 0x8;
|
||||
}
|
||||
} else {
|
||||
switch (inst->dst.pack) {
|
||||
case QPU_PACK_A_NOP:
|
||||
case QPU_PACK_A_8888:
|
||||
case QPU_PACK_A_8888_SAT:
|
||||
case QPU_PACK_A_32_SAT:
|
||||
return 0xf;
|
||||
case QPU_PACK_A_8A:
|
||||
case QPU_PACK_A_8A_SAT:
|
||||
return 0x1;
|
||||
case QPU_PACK_A_8B:
|
||||
case QPU_PACK_A_8B_SAT:
|
||||
return 0x2;
|
||||
case QPU_PACK_A_8C:
|
||||
case QPU_PACK_A_8C_SAT:
|
||||
return 0x4;
|
||||
case QPU_PACK_A_8D:
|
||||
case QPU_PACK_A_8D_SAT:
|
||||
return 0x8;
|
||||
case QPU_PACK_A_16A:
|
||||
case QPU_PACK_A_16A_SAT:
|
||||
return 0x3;
|
||||
case QPU_PACK_A_16B:
|
||||
case QPU_PACK_A_16B_SAT:
|
||||
return 0xc;
|
||||
}
|
||||
}
|
||||
unreachable("Bad pack field");
|
||||
}
|
||||
#endif
|
||||
|
||||
struct qreg
|
||||
vir_get_temp(struct v3d_compile *c)
|
||||
{
|
||||
struct qreg reg;
|
||||
|
||||
reg.file = QFILE_TEMP;
|
||||
reg.index = c->num_temps++;
|
||||
|
||||
if (c->num_temps > c->defs_array_size) {
|
||||
uint32_t old_size = c->defs_array_size;
|
||||
c->defs_array_size = MAX2(old_size * 2, 16);
|
||||
c->defs = reralloc(c, c->defs, struct qinst *,
|
||||
c->defs_array_size);
|
||||
memset(&c->defs[old_size], 0,
|
||||
sizeof(c->defs[0]) * (c->defs_array_size - old_size));
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
struct qinst *
|
||||
vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
|
||||
{
|
||||
struct qinst *inst = calloc(1, sizeof(*inst));
|
||||
|
||||
inst->qpu = v3d_qpu_nop();
|
||||
inst->qpu.alu.add.op = op;
|
||||
|
||||
inst->dst = dst;
|
||||
inst->src[0] = src0;
|
||||
inst->src[1] = src1;
|
||||
inst->uniform = ~0;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
struct qinst *
|
||||
vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
|
||||
{
|
||||
struct qinst *inst = calloc(1, sizeof(*inst));
|
||||
|
||||
inst->qpu = v3d_qpu_nop();
|
||||
inst->qpu.alu.mul.op = op;
|
||||
|
||||
inst->dst = dst;
|
||||
inst->src[0] = src0;
|
||||
inst->src[1] = src1;
|
||||
inst->uniform = ~0;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
struct qinst *
|
||||
vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
|
||||
{
|
||||
struct qinst *inst = calloc(1, sizeof(*inst));
|
||||
|
||||
inst->qpu = v3d_qpu_nop();
|
||||
inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
|
||||
inst->qpu.branch.cond = cond;
|
||||
inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
|
||||
inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
|
||||
inst->qpu.branch.ub = true;
|
||||
inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
|
||||
|
||||
inst->dst = vir_reg(QFILE_NULL, 0);
|
||||
inst->src[0] = src;
|
||||
inst->uniform = ~0;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
static void
|
||||
vir_emit(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
list_addtail(&inst->link, &c->cur_block->instructions);
|
||||
|
||||
if (inst->dst.file == QFILE_MAGIC &&
|
||||
inst->dst.index == V3D_QPU_WADDR_VPM)
|
||||
c->num_vpm_writes++;
|
||||
}
|
||||
|
||||
/* Updates inst to write to a new temporary, emits it, and notes the def. */
|
||||
struct qreg
|
||||
vir_emit_def(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
assert(inst->dst.file == QFILE_NULL);
|
||||
|
||||
inst->dst = vir_get_temp(c);
|
||||
|
||||
if (inst->dst.file == QFILE_TEMP)
|
||||
c->defs[inst->dst.index] = inst;
|
||||
|
||||
vir_emit(c, inst);
|
||||
|
||||
return inst->dst;
|
||||
}
|
||||
|
||||
struct qinst *
|
||||
vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
if (inst->dst.file == QFILE_TEMP)
|
||||
c->defs[inst->dst.index] = NULL;
|
||||
|
||||
vir_emit(c, inst);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
struct qblock *
|
||||
vir_new_block(struct v3d_compile *c)
|
||||
{
|
||||
struct qblock *block = rzalloc(c, struct qblock);
|
||||
|
||||
list_inithead(&block->instructions);
|
||||
|
||||
block->predecessors = _mesa_set_create(block,
|
||||
_mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
block->index = c->next_block_index++;
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void
|
||||
vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
|
||||
{
|
||||
c->cur_block = block;
|
||||
list_addtail(&block->link, &c->blocks);
|
||||
}
|
||||
|
||||
struct qblock *
|
||||
vir_entry_block(struct v3d_compile *c)
|
||||
{
|
||||
return list_first_entry(&c->blocks, struct qblock, link);
|
||||
}
|
||||
|
||||
struct qblock *
|
||||
vir_exit_block(struct v3d_compile *c)
|
||||
{
|
||||
return list_last_entry(&c->blocks, struct qblock, link);
|
||||
}
|
||||
|
||||
void
|
||||
vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
|
||||
{
|
||||
_mesa_set_add(successor->predecessors, predecessor);
|
||||
if (predecessor->successors[0]) {
|
||||
assert(!predecessor->successors[1]);
|
||||
predecessor->successors[1] = successor;
|
||||
} else {
|
||||
predecessor->successors[0] = successor;
|
||||
}
|
||||
}
|
||||
|
||||
const struct v3d_compiler *
|
||||
v3d_compiler_init(const struct v3d_device_info *devinfo)
|
||||
{
|
||||
struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
|
||||
if (!compiler)
|
||||
return NULL;
|
||||
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
if (!vir_init_reg_sets(compiler)) {
|
||||
ralloc_free(compiler);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return compiler;
|
||||
}
|
||||
|
||||
void
|
||||
v3d_compiler_free(const struct v3d_compiler *compiler)
|
||||
{
|
||||
ralloc_free((void *)compiler);
|
||||
}
|
||||
|
||||
static struct v3d_compile *
|
||||
vir_compile_init(const struct v3d_compiler *compiler,
|
||||
struct v3d_key *key,
|
||||
nir_shader *s,
|
||||
int program_id, int variant_id)
|
||||
{
|
||||
struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
|
||||
|
||||
c->compiler = compiler;
|
||||
c->devinfo = compiler->devinfo;
|
||||
c->key = key;
|
||||
c->program_id = program_id;
|
||||
c->variant_id = variant_id;
|
||||
|
||||
s = nir_shader_clone(c, s);
|
||||
c->s = s;
|
||||
|
||||
list_inithead(&c->blocks);
|
||||
vir_set_emit_block(c, vir_new_block(c));
|
||||
|
||||
c->output_position_index = -1;
|
||||
c->output_point_size_index = -1;
|
||||
c->output_sample_mask_index = -1;
|
||||
|
||||
c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_lower_nir(struct v3d_compile *c)
|
||||
{
|
||||
struct nir_lower_tex_options tex_options = {
|
||||
.lower_rect = false, /* XXX */
|
||||
.lower_txp = ~0,
|
||||
/* Apply swizzles to all samplers. */
|
||||
.swizzle_result = ~0,
|
||||
};
|
||||
|
||||
/* Lower the format swizzle and (for 32-bit returns)
|
||||
* ARB_texture_swizzle-style swizzle.
|
||||
*/
|
||||
for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
|
||||
for (int j = 0; j < 4; j++)
|
||||
tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
|
||||
}
|
||||
|
||||
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_lower_nir_late(struct v3d_compile *c)
|
||||
{
|
||||
NIR_PASS_V(c->s, v3d_nir_lower_io, c);
|
||||
NIR_PASS_V(c->s, nir_lower_idiv);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_set_prog_data_uniforms(struct v3d_compile *c,
|
||||
struct v3d_prog_data *prog_data)
|
||||
{
|
||||
int count = c->num_uniforms;
|
||||
struct v3d_uniform_list *ulist = &prog_data->uniforms;
|
||||
|
||||
ulist->count = count;
|
||||
ulist->data = ralloc_array(prog_data, uint32_t, count);
|
||||
memcpy(ulist->data, c->uniform_data,
|
||||
count * sizeof(*ulist->data));
|
||||
ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
|
||||
memcpy(ulist->contents, c->uniform_contents,
|
||||
count * sizeof(*ulist->contents));
|
||||
}
|
||||
|
||||
/* Copy the compiler UBO range state to the compiled shader, dropping out
|
||||
* arrays that were never referenced by an indirect load.
|
||||
*
|
||||
* (Note that QIR dead code elimination of an array access still leaves that
|
||||
* array alive, though)
|
||||
*/
|
||||
static void
|
||||
v3d_set_prog_data_ubo(struct v3d_compile *c,
|
||||
struct v3d_prog_data *prog_data)
|
||||
{
|
||||
if (!c->num_ubo_ranges)
|
||||
return;
|
||||
|
||||
prog_data->num_ubo_ranges = 0;
|
||||
prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
|
||||
c->num_ubo_ranges);
|
||||
for (int i = 0; i < c->num_ubo_ranges; i++) {
|
||||
if (!c->ubo_range_used[i])
|
||||
continue;
|
||||
|
||||
struct v3d_ubo_range *range = &c->ubo_ranges[i];
|
||||
prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
|
||||
prog_data->ubo_size += range->size;
|
||||
}
|
||||
|
||||
if (prog_data->ubo_size) {
|
||||
if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
|
||||
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
|
||||
vir_get_stage_name(c),
|
||||
c->program_id, c->variant_id,
|
||||
prog_data->ubo_size / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_set_prog_data(struct v3d_compile *c,
|
||||
struct v3d_prog_data *prog_data)
|
||||
{
|
||||
v3d_set_prog_data_uniforms(c, prog_data);
|
||||
v3d_set_prog_data_ubo(c, prog_data);
|
||||
}
|
||||
|
||||
static uint64_t *
|
||||
v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
|
||||
{
|
||||
*final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
|
||||
|
||||
uint64_t *qpu_insts = malloc(*final_assembly_size);
|
||||
if (!qpu_insts)
|
||||
return NULL;
|
||||
|
||||
memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
|
||||
|
||||
vir_compile_destroy(c);
|
||||
|
||||
return qpu_insts;
|
||||
}
|
||||
|
||||
uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
|
||||
struct v3d_vs_key *key,
|
||||
struct v3d_vs_prog_data *prog_data,
|
||||
nir_shader *s,
|
||||
int program_id, int variant_id,
|
||||
uint32_t *final_assembly_size)
|
||||
{
|
||||
struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
|
||||
program_id, variant_id);
|
||||
|
||||
c->vs_key = key;
|
||||
|
||||
v3d_lower_nir(c);
|
||||
|
||||
if (key->clamp_color)
|
||||
NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
|
||||
|
||||
if (key->base.ucp_enables) {
|
||||
NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
|
||||
NIR_PASS_V(c->s, nir_lower_io_to_scalar,
|
||||
nir_var_shader_out);
|
||||
}
|
||||
|
||||
/* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
|
||||
NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
|
||||
|
||||
v3d_lower_nir_late(c);
|
||||
v3d_optimize_nir(c->s);
|
||||
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
|
||||
|
||||
v3d_nir_to_vir(c);
|
||||
|
||||
v3d_set_prog_data(c, &prog_data->base);
|
||||
|
||||
prog_data->base.num_inputs = c->num_inputs;
|
||||
|
||||
/* The vertex data gets format converted by the VPM so that
|
||||
* each attribute channel takes up a VPM column. Precompute
|
||||
* the sizes for the shader record.
|
||||
*/
|
||||
for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
|
||||
prog_data->vattr_sizes[i] = c->vattr_sizes[i];
|
||||
prog_data->vpm_input_size += c->vattr_sizes[i];
|
||||
}
|
||||
|
||||
/* Input/output segment size are in 8x32-bit multiples. */
|
||||
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
|
||||
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
|
||||
|
||||
prog_data->uses_vid = (s->info.system_values_read &
|
||||
(1ull << SYSTEM_VALUE_VERTEX_ID));
|
||||
prog_data->uses_iid = (s->info.system_values_read &
|
||||
(1ull << SYSTEM_VALUE_INSTANCE_ID));
|
||||
|
||||
return v3d_return_qpu_insts(c, final_assembly_size);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
|
||||
struct v3d_fs_prog_data *prog_data)
|
||||
{
|
||||
prog_data->base.num_inputs = c->num_inputs;
|
||||
memcpy(prog_data->input_slots, c->input_slots,
|
||||
c->num_inputs * sizeof(*c->input_slots));
|
||||
|
||||
for (int i = 0; i < c->num_inputs; i++) {
|
||||
struct v3d_varying_slot v3d_slot = c->input_slots[i];
|
||||
uint8_t slot = v3d_slot_get_slot(v3d_slot);
|
||||
|
||||
if (slot == VARYING_SLOT_COL0 ||
|
||||
slot == VARYING_SLOT_COL1 ||
|
||||
slot == VARYING_SLOT_BFC0 ||
|
||||
slot == VARYING_SLOT_BFC1) {
|
||||
BITSET_SET(prog_data->color_inputs, i);
|
||||
}
|
||||
|
||||
if (BITSET_TEST(c->flat_shade_flags, i))
|
||||
BITSET_SET(prog_data->flat_shade_flags, i);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
|
||||
struct v3d_fs_key *key,
|
||||
struct v3d_fs_prog_data *prog_data,
|
||||
nir_shader *s,
|
||||
int program_id, int variant_id,
|
||||
uint32_t *final_assembly_size)
|
||||
{
|
||||
struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
|
||||
program_id, variant_id);
|
||||
|
||||
c->fs_key = key;
|
||||
|
||||
v3d_lower_nir(c);
|
||||
|
||||
if (key->light_twoside)
|
||||
NIR_PASS_V(c->s, nir_lower_two_sided_color);
|
||||
|
||||
if (key->clamp_color)
|
||||
NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
|
||||
|
||||
if (key->alpha_test) {
|
||||
NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
|
||||
false);
|
||||
}
|
||||
|
||||
if (key->base.ucp_enables)
|
||||
NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
|
||||
|
||||
/* Note: FS input scalarizing must happen after
|
||||
* nir_lower_two_sided_color, which only handles a vec4 at a time.
|
||||
*/
|
||||
NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
|
||||
|
||||
v3d_lower_nir_late(c);
|
||||
v3d_optimize_nir(c->s);
|
||||
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
|
||||
|
||||
v3d_nir_to_vir(c);
|
||||
|
||||
v3d_set_prog_data(c, &prog_data->base);
|
||||
v3d_set_fs_prog_data_inputs(c, prog_data);
|
||||
if (c->s->info.outputs_written & (1 << FRAG_RESULT_DEPTH))
|
||||
prog_data->writes_z = true;
|
||||
|
||||
return v3d_return_qpu_insts(c, final_assembly_size);
|
||||
}
|
||||
|
||||
void
|
||||
vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
|
||||
{
|
||||
if (qinst->dst.file == QFILE_TEMP)
|
||||
c->defs[qinst->dst.index] = NULL;
|
||||
|
||||
list_del(&qinst->link);
|
||||
free(qinst);
|
||||
}
|
||||
|
||||
struct qreg
|
||||
vir_follow_movs(struct v3d_compile *c, struct qreg reg)
|
||||
{
|
||||
/* XXX
|
||||
int pack = reg.pack;
|
||||
|
||||
while (reg.file == QFILE_TEMP &&
|
||||
c->defs[reg.index] &&
|
||||
(c->defs[reg.index]->op == QOP_MOV ||
|
||||
c->defs[reg.index]->op == QOP_FMOV) &&
|
||||
!c->defs[reg.index]->dst.pack &&
|
||||
!c->defs[reg.index]->src[0].pack) {
|
||||
reg = c->defs[reg.index]->src[0];
|
||||
}
|
||||
|
||||
reg.pack = pack;
|
||||
*/
|
||||
return reg;
|
||||
}
|
||||
|
||||
void
|
||||
vir_compile_destroy(struct v3d_compile *c)
|
||||
{
|
||||
vir_for_each_block(block, c) {
|
||||
while (!list_empty(&block->instructions)) {
|
||||
struct qinst *qinst =
|
||||
list_first_entry(&block->instructions,
|
||||
struct qinst, link);
|
||||
vir_remove_instruction(c, qinst);
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(c);
|
||||
}
|
||||
|
||||
struct qreg
|
||||
vir_uniform(struct v3d_compile *c,
|
||||
enum quniform_contents contents,
|
||||
uint32_t data)
|
||||
{
|
||||
for (int i = 0; i < c->num_uniforms; i++) {
|
||||
if (c->uniform_contents[i] == contents &&
|
||||
c->uniform_data[i] == data) {
|
||||
return vir_reg(QFILE_UNIF, i);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t uniform = c->num_uniforms++;
|
||||
|
||||
if (uniform >= c->uniform_array_size) {
|
||||
c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
|
||||
c->uniform_array_size * 2);
|
||||
|
||||
c->uniform_data = reralloc(c, c->uniform_data,
|
||||
uint32_t,
|
||||
c->uniform_array_size);
|
||||
c->uniform_contents = reralloc(c, c->uniform_contents,
|
||||
enum quniform_contents,
|
||||
c->uniform_array_size);
|
||||
}
|
||||
|
||||
c->uniform_contents[uniform] = contents;
|
||||
c->uniform_data[uniform] = data;
|
||||
|
||||
return vir_reg(QFILE_UNIF, uniform);
|
||||
}
|
||||
|
||||
void
|
||||
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
|
||||
{
|
||||
struct qinst *last_inst = NULL;
|
||||
|
||||
if (!list_empty(&c->cur_block->instructions))
|
||||
last_inst = (struct qinst *)c->cur_block->instructions.prev;
|
||||
|
||||
if (src.file != QFILE_TEMP ||
|
||||
!c->defs[src.index] ||
|
||||
last_inst != c->defs[src.index]) {
|
||||
/* XXX: Make the MOV be the appropriate type */
|
||||
last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
|
||||
last_inst = (struct qinst *)c->cur_block->instructions.prev;
|
||||
}
|
||||
|
||||
vir_set_pf(last_inst, pf);
|
||||
}
|
||||
|
||||
#define OPTPASS(func) \
|
||||
do { \
|
||||
bool stage_progress = func(c); \
|
||||
if (stage_progress) { \
|
||||
progress = true; \
|
||||
if (print_opt_debug) { \
|
||||
fprintf(stderr, \
|
||||
"VIR opt pass %2d: %s progress\n", \
|
||||
pass, #func); \
|
||||
} \
|
||||
/*XXX vir_validate(c);*/ \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void
|
||||
vir_optimize(struct v3d_compile *c)
|
||||
{
|
||||
bool print_opt_debug = false;
|
||||
int pass = 1;
|
||||
|
||||
while (true) {
|
||||
bool progress = false;
|
||||
|
||||
OPTPASS(vir_opt_copy_propagate);
|
||||
OPTPASS(vir_opt_dead_code);
|
||||
|
||||
if (!progress)
|
||||
break;
|
||||
|
||||
pass++;
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
vir_get_stage_name(struct v3d_compile *c)
|
||||
{
|
||||
if (c->vs_key && c->vs_key->is_coord)
|
||||
return "MESA_SHADER_COORD";
|
||||
else
|
||||
return gl_shader_stage_name(c->s->stage);
|
||||
}
|
|
@ -0,0 +1,339 @@
|
|||
/*
|
||||
* Copyright © 2016-2017 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
static void
|
||||
vir_print_reg(struct v3d_compile *c, struct qreg reg)
|
||||
{
|
||||
static const char *files[] = {
|
||||
[QFILE_TEMP] = "t",
|
||||
[QFILE_VARY] = "v",
|
||||
[QFILE_UNIF] = "u",
|
||||
[QFILE_TLB] = "tlb",
|
||||
[QFILE_TLBU] = "tlbu",
|
||||
};
|
||||
static const char *quniform_names[] = {
|
||||
[QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
|
||||
[QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
|
||||
[QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
|
||||
[QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
|
||||
};
|
||||
|
||||
switch (reg.file) {
|
||||
|
||||
case QFILE_NULL:
|
||||
fprintf(stderr, "null");
|
||||
break;
|
||||
|
||||
case QFILE_LOAD_IMM:
|
||||
fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index));
|
||||
break;
|
||||
|
||||
case QFILE_REG:
|
||||
fprintf(stderr, "rf%d", reg.index);
|
||||
break;
|
||||
|
||||
case QFILE_MAGIC:
|
||||
fprintf(stderr, "%s", v3d_qpu_magic_waddr_name(reg.index));
|
||||
break;
|
||||
|
||||
case QFILE_SMALL_IMM:
|
||||
if ((int)reg.index >= -16 && (int)reg.index <= 15)
|
||||
fprintf(stderr, "%d", reg.index);
|
||||
else
|
||||
fprintf(stderr, "%f", uif(reg.index));
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
fprintf(stderr, "vpm%d.%d",
|
||||
reg.index / 4, reg.index % 4);
|
||||
break;
|
||||
|
||||
case QFILE_TLB:
|
||||
fprintf(stderr, "%s", files[reg.file]);
|
||||
break;
|
||||
|
||||
case QFILE_UNIF: {
|
||||
enum quniform_contents contents = c->uniform_contents[reg.index];
|
||||
|
||||
fprintf(stderr, "%s%d", files[reg.file], reg.index);
|
||||
|
||||
switch (contents) {
|
||||
case QUNIFORM_CONSTANT:
|
||||
fprintf(stderr, " (0x%08x / %f)",
|
||||
c->uniform_data[reg.index],
|
||||
uif(c->uniform_data[reg.index]));
|
||||
break;
|
||||
|
||||
case QUNIFORM_UNIFORM:
|
||||
fprintf(stderr, " (push[%d])",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_CONFIG_P1:
|
||||
fprintf(stderr, " (tex[%d].p1)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_WIDTH:
|
||||
fprintf(stderr, " (tex[%d].width)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
case QUNIFORM_TEXTURE_HEIGHT:
|
||||
fprintf(stderr, " (tex[%d].height)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
case QUNIFORM_TEXTURE_DEPTH:
|
||||
fprintf(stderr, " (tex[%d].depth)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
case QUNIFORM_TEXTURE_ARRAY_SIZE:
|
||||
fprintf(stderr, " (tex[%d].array_size)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
case QUNIFORM_TEXTURE_LEVELS:
|
||||
fprintf(stderr, " (tex[%d].levels)",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_UBO_ADDR:
|
||||
fprintf(stderr, " (ubo[%d])",
|
||||
c->uniform_data[reg.index]);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (quniform_contents_is_texture_p0(contents)) {
|
||||
fprintf(stderr, " (tex[%d].p0: 0x%08x)",
|
||||
contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
|
||||
c->uniform_data[reg.index]);
|
||||
} else if (contents < ARRAY_SIZE(quniform_names)) {
|
||||
fprintf(stderr, " (%s)",
|
||||
quniform_names[contents]);
|
||||
} else {
|
||||
fprintf(stderr, " (%d / 0x%08x)", contents,
|
||||
c->uniform_data[reg.index]);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
fprintf(stderr, "%s%d", files[reg.file], reg.index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
struct v3d_qpu_sig *sig = &inst->qpu.sig;
|
||||
|
||||
if (sig->thrsw)
|
||||
fprintf(stderr, "; thrsw");
|
||||
if (sig->ldvary)
|
||||
fprintf(stderr, "; ldvary");
|
||||
if (sig->ldvpm)
|
||||
fprintf(stderr, "; ldvpm");
|
||||
if (sig->ldtmu)
|
||||
fprintf(stderr, "; ldtmu");
|
||||
if (sig->ldunif)
|
||||
fprintf(stderr, "; ldunif");
|
||||
if (sig->wrtmuc)
|
||||
fprintf(stderr, "; wrtmuc");
|
||||
}
|
||||
|
||||
static void
|
||||
vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
struct v3d_qpu_instr *instr = &inst->qpu;
|
||||
int nsrc = vir_get_non_sideband_nsrc(inst);
|
||||
int sideband_nsrc = vir_get_nsrc(inst);
|
||||
enum v3d_qpu_input_unpack unpack[2];
|
||||
|
||||
if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) {
|
||||
fprintf(stderr, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
|
||||
fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.ac));
|
||||
fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.apf));
|
||||
fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.auf));
|
||||
fprintf(stderr, " ");
|
||||
|
||||
vir_print_reg(c, inst->dst);
|
||||
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack));
|
||||
|
||||
unpack[0] = instr->alu.add.a_unpack;
|
||||
unpack[1] = instr->alu.add.b_unpack;
|
||||
} else {
|
||||
fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
|
||||
fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc));
|
||||
fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.mpf));
|
||||
fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.muf));
|
||||
fprintf(stderr, " ");
|
||||
|
||||
vir_print_reg(c, inst->dst);
|
||||
fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack));
|
||||
|
||||
unpack[0] = instr->alu.mul.a_unpack;
|
||||
unpack[1] = instr->alu.mul.b_unpack;
|
||||
}
|
||||
|
||||
for (int i = 0; i < sideband_nsrc; i++) {
|
||||
fprintf(stderr, ", ");
|
||||
vir_print_reg(c, inst->src[i]);
|
||||
if (i < nsrc)
|
||||
fprintf(stderr, "%s", v3d_qpu_unpack_name(unpack[i]));
|
||||
}
|
||||
|
||||
vir_dump_sig(c, inst);
|
||||
}
|
||||
|
||||
void
|
||||
vir_dump_inst(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
struct v3d_qpu_instr *instr = &inst->qpu;
|
||||
|
||||
switch (inst->qpu.type) {
|
||||
case V3D_QPU_INSTR_TYPE_ALU:
|
||||
vir_dump_alu(c, inst);
|
||||
break;
|
||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
||||
fprintf(stderr, "b");
|
||||
if (instr->branch.ub)
|
||||
fprintf(stderr, "u");
|
||||
|
||||
fprintf(stderr, "%s",
|
||||
v3d_qpu_branch_cond_name(instr->branch.cond));
|
||||
fprintf(stderr, "%s", v3d_qpu_msfign_name(instr->branch.msfign));
|
||||
|
||||
switch (instr->branch.bdi) {
|
||||
case V3D_QPU_BRANCH_DEST_ABS:
|
||||
fprintf(stderr, " zero_addr+0x%08x", instr->branch.offset);
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_REL:
|
||||
fprintf(stderr, " %d", instr->branch.offset);
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_LINK_REG:
|
||||
fprintf(stderr, " lri");
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_REGFILE:
|
||||
fprintf(stderr, " rf%d", instr->branch.raddr_a);
|
||||
break;
|
||||
}
|
||||
|
||||
if (instr->branch.ub) {
|
||||
switch (instr->branch.bdu) {
|
||||
case V3D_QPU_BRANCH_DEST_ABS:
|
||||
fprintf(stderr, ", a:unif");
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_REL:
|
||||
fprintf(stderr, ", r:unif");
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_LINK_REG:
|
||||
fprintf(stderr, ", lri");
|
||||
break;
|
||||
|
||||
case V3D_QPU_BRANCH_DEST_REGFILE:
|
||||
fprintf(stderr, ", rf%d", instr->branch.raddr_a);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (vir_has_implicit_uniform(inst)) {
|
||||
fprintf(stderr, " ");
|
||||
vir_print_reg(c, inst->src[vir_get_implicit_uniform_src(inst)]);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vir_dump(struct v3d_compile *c)
|
||||
{
|
||||
int ip = 0;
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
fprintf(stderr, "BLOCK %d:\n", block->index);
|
||||
vir_for_each_inst(inst, block) {
|
||||
if (c->temp_start) {
|
||||
bool first = true;
|
||||
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] != ip)
|
||||
continue;
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
fprintf(stderr, ", ");
|
||||
}
|
||||
fprintf(stderr, "S%4d", i);
|
||||
}
|
||||
|
||||
if (first)
|
||||
fprintf(stderr, " ");
|
||||
else
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
|
||||
if (c->temp_end) {
|
||||
bool first = true;
|
||||
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_end[i] != ip)
|
||||
continue;
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
fprintf(stderr, ", ");
|
||||
}
|
||||
fprintf(stderr, "E%4d", i);
|
||||
}
|
||||
|
||||
if (first)
|
||||
fprintf(stderr, " ");
|
||||
else
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
ip++;
|
||||
}
|
||||
if (block->successors[1]) {
|
||||
fprintf(stderr, "-> BLOCK %d, %d\n",
|
||||
block->successors[0]->index,
|
||||
block->successors[1]->index);
|
||||
} else if (block->successors[0]) {
|
||||
fprintf(stderr, "-> BLOCK %d\n",
|
||||
block->successors[0]->index);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,340 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
* Copyright © 2016 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define MAX_INSTRUCTION (1 << 30)
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
struct partial_update_state {
|
||||
struct qinst *insts[4];
|
||||
uint8_t channels;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
int_hash(const void *key)
|
||||
{
|
||||
return _mesa_hash_data(key, sizeof(int));
|
||||
}
|
||||
|
||||
static bool
|
||||
int_compare(const void *key1, const void *key2)
|
||||
{
|
||||
return *(const int *)key1 == *(const int *)key2;
|
||||
}
|
||||
|
||||
static int
|
||||
vir_reg_to_var(struct qreg reg)
|
||||
{
|
||||
if (reg.file == QFILE_TEMP)
|
||||
return reg.index;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
|
||||
struct qreg src)
|
||||
{
|
||||
int var = vir_reg_to_var(src);
|
||||
if (var == -1)
|
||||
return;
|
||||
|
||||
c->temp_start[var] = MIN2(c->temp_start[var], ip);
|
||||
c->temp_end[var] = MAX2(c->temp_end[var], ip);
|
||||
|
||||
/* The use[] bitset marks when the block makes
|
||||
* use of a variable without having completely
|
||||
* defined that variable within the block.
|
||||
*/
|
||||
if (!BITSET_TEST(block->def, var))
|
||||
BITSET_SET(block->use, var);
|
||||
}
|
||||
|
||||
static struct partial_update_state *
|
||||
get_partial_update_state(struct hash_table *partial_update_ht,
|
||||
struct qinst *inst)
|
||||
{
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(partial_update_ht,
|
||||
&inst->dst.index);
|
||||
if (entry)
|
||||
return entry->data;
|
||||
|
||||
struct partial_update_state *state =
|
||||
rzalloc(partial_update_ht, struct partial_update_state);
|
||||
|
||||
_mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
|
||||
struct hash_table *partial_update_ht, struct qinst *inst)
|
||||
{
|
||||
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return;
|
||||
|
||||
/* The def[] bitset marks when an initialization in a
|
||||
* block completely screens off previous updates of
|
||||
* that variable.
|
||||
*/
|
||||
int var = vir_reg_to_var(inst->dst);
|
||||
if (var == -1)
|
||||
return;
|
||||
|
||||
c->temp_start[var] = MIN2(c->temp_start[var], ip);
|
||||
c->temp_end[var] = MAX2(c->temp_end[var], ip);
|
||||
|
||||
/* If we've already tracked this as a def, or already used it within
|
||||
* the block, there's nothing to do.
|
||||
*/
|
||||
if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
|
||||
return;
|
||||
|
||||
/* Easy, common case: unconditional full register update.
|
||||
*
|
||||
* We treat conditioning on the exec mask as the same as not being
|
||||
* conditional. This makes sure that if the register gets set on
|
||||
* either side of an if, it is treated as being screened off before
|
||||
* the if. Otherwise, if there was no intervening def, its live
|
||||
* interval doesn't extend back to the start of he program, and if too
|
||||
* many registers did that we'd fail to register allocate.
|
||||
*/
|
||||
if (((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
|
||||
inst->qpu.flags.mc == V3D_QPU_COND_NONE) ||
|
||||
inst->cond_is_exec_mask) &&
|
||||
inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
|
||||
inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
|
||||
BITSET_SET(block->def, var);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finally, look at the condition code and packing and mark it as a
|
||||
* def. We need to make sure that we understand sequences
|
||||
* instructions like:
|
||||
*
|
||||
* mov.zs t0, t1
|
||||
* mov.zc t0, t2
|
||||
*
|
||||
* or:
|
||||
*
|
||||
* mmov t0.8a, t1
|
||||
* mmov t0.8b, t2
|
||||
* mmov t0.8c, t3
|
||||
* mmov t0.8d, t4
|
||||
*
|
||||
* as defining the temp within the block, because otherwise dst's live
|
||||
* range will get extended up the control flow to the top of the
|
||||
* program.
|
||||
*/
|
||||
struct partial_update_state *state =
|
||||
get_partial_update_state(partial_update_ht, inst);
|
||||
uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
|
||||
|
||||
if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
|
||||
inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
|
||||
state->channels |= mask;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
|
||||
/* XXXif (state->insts[i] &&
|
||||
state->insts[i]->cond ==
|
||||
qpu_cond_complement(inst->cond))
|
||||
state->channels |= 1 << i;
|
||||
else
|
||||
*/
|
||||
state->insts[i] = inst;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->channels == 0xf)
|
||||
BITSET_SET(block->def, var);
|
||||
}
|
||||
|
||||
static void
|
||||
sf_state_clear(struct hash_table *partial_update_ht)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
|
||||
hash_table_foreach(partial_update_ht, entry) {
|
||||
struct partial_update_state *state = entry->data;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (state->insts[i] &&
|
||||
(state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
|
||||
state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
|
||||
state->insts[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Sets up the def/use arrays for when variables are used-before-defined or
|
||||
* defined-before-used in the block.
|
||||
*
|
||||
* Also initializes the temp_start/temp_end to cover just the instruction IPs
|
||||
* where the variable is used, which will be extended later in
|
||||
* vir_compute_start_end().
|
||||
*/
|
||||
static void
|
||||
vir_setup_def_use(struct v3d_compile *c)
|
||||
{
|
||||
struct hash_table *partial_update_ht =
|
||||
_mesa_hash_table_create(c, int_hash, int_compare);
|
||||
int ip = 0;
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
block->start_ip = ip;
|
||||
|
||||
_mesa_hash_table_clear(partial_update_ht, NULL);
|
||||
|
||||
vir_for_each_inst(inst, block) {
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++)
|
||||
vir_setup_use(c, block, ip, inst->src[i]);
|
||||
|
||||
vir_setup_def(c, block, ip, partial_update_ht, inst);
|
||||
|
||||
if (false /* XXX inst->uf */)
|
||||
sf_state_clear(partial_update_ht);
|
||||
|
||||
/* Payload registers: r0/1/2 contain W, centroid W,
|
||||
* and Z at program start. Register allocation will
|
||||
* force their nodes to R0/1/2.
|
||||
*/
|
||||
if (inst->src[0].file == QFILE_REG) {
|
||||
switch (inst->src[0].index) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
c->temp_start[inst->dst.index] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ip++;
|
||||
}
|
||||
block->end_ip = ip;
|
||||
}
|
||||
|
||||
_mesa_hash_table_destroy(partial_update_ht, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
|
||||
{
|
||||
bool cont = false;
|
||||
|
||||
vir_for_each_block_rev(block, c) {
|
||||
/* Update live_out: Any successor using the variable
|
||||
* on entrance needs us to have the variable live on
|
||||
* exit.
|
||||
*/
|
||||
vir_for_each_successor(succ, block) {
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
BITSET_WORD new_live_out = (succ->live_in[i] &
|
||||
~block->live_out[i]);
|
||||
if (new_live_out) {
|
||||
block->live_out[i] |= new_live_out;
|
||||
cont = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update live_in */
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
BITSET_WORD new_live_in = (block->use[i] |
|
||||
(block->live_out[i] &
|
||||
~block->def[i]));
|
||||
if (new_live_in & ~block->live_in[i]) {
|
||||
block->live_in[i] |= new_live_in;
|
||||
cont = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cont;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend the start/end ranges for each variable to account for the
|
||||
* new information calculated from control flow.
|
||||
*/
|
||||
static void
|
||||
vir_compute_start_end(struct v3d_compile *c, int num_vars)
|
||||
{
|
||||
vir_for_each_block(block, c) {
|
||||
for (int i = 0; i < num_vars; i++) {
|
||||
if (BITSET_TEST(block->live_in, i)) {
|
||||
c->temp_start[i] = MIN2(c->temp_start[i],
|
||||
block->start_ip);
|
||||
c->temp_end[i] = MAX2(c->temp_end[i],
|
||||
block->start_ip);
|
||||
}
|
||||
|
||||
if (BITSET_TEST(block->live_out, i)) {
|
||||
c->temp_start[i] = MIN2(c->temp_start[i],
|
||||
block->end_ip);
|
||||
c->temp_end[i] = MAX2(c->temp_end[i],
|
||||
block->end_ip);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vir_calculate_live_intervals(struct v3d_compile *c)
|
||||
{
|
||||
int bitset_words = BITSET_WORDS(c->num_temps);
|
||||
|
||||
/* If we called this function more than once, then we should be
|
||||
* freeing the previous arrays.
|
||||
*/
|
||||
assert(!c->temp_start);
|
||||
|
||||
c->temp_start = rzalloc_array(c, int, c->num_temps);
|
||||
c->temp_end = rzalloc_array(c, int, c->num_temps);
|
||||
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
c->temp_start[i] = MAX_INSTRUCTION;
|
||||
c->temp_end[i] = -1;
|
||||
}
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
|
||||
block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
|
||||
block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
|
||||
block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
|
||||
}
|
||||
|
||||
vir_setup_def_use(c);
|
||||
|
||||
while (vir_live_variables_dataflow(c, bitset_words))
|
||||
;
|
||||
|
||||
vir_compute_start_end(c, c->num_temps);
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file v3d_vir_lower_uniforms.c
|
||||
*
|
||||
* This is the pre-code-generation pass for fixing up instructions that try to
|
||||
* read from multiple uniform values.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static inline uint32_t
|
||||
index_hash(const void *key)
|
||||
{
|
||||
return (uintptr_t)key;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
index_compare(const void *a, const void *b)
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
|
||||
static void
|
||||
add_uniform(struct hash_table *ht, struct qreg reg)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
void *key = (void *)(uintptr_t)(reg.index + 1);
|
||||
|
||||
entry = _mesa_hash_table_search(ht, key);
|
||||
if (entry) {
|
||||
entry->data++;
|
||||
} else {
|
||||
_mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
remove_uniform(struct hash_table *ht, struct qreg reg)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
void *key = (void *)(uintptr_t)(reg.index + 1);
|
||||
|
||||
entry = _mesa_hash_table_search(ht, key);
|
||||
assert(entry);
|
||||
entry->data--;
|
||||
if (entry->data == NULL)
|
||||
_mesa_hash_table_remove(ht, entry);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_lowerable_uniform(struct qinst *inst, int i)
|
||||
{
|
||||
if (inst->src[i].file != QFILE_UNIF)
|
||||
return false;
|
||||
if (vir_has_implicit_uniform(inst))
|
||||
return i != vir_get_implicit_uniform_src(inst);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Returns the number of different uniform values referenced by the
|
||||
* instruction.
|
||||
*/
|
||||
static uint32_t
|
||||
vir_get_instruction_uniform_count(struct qinst *inst)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file != QFILE_UNIF)
|
||||
continue;
|
||||
|
||||
bool is_duplicate = false;
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (inst->src[j].file == QFILE_UNIF &&
|
||||
inst->src[j].index == inst->src[i].index) {
|
||||
is_duplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!is_duplicate)
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
vir_lower_uniforms(struct v3d_compile *c)
|
||||
{
|
||||
struct hash_table *ht =
|
||||
_mesa_hash_table_create(c, index_hash, index_compare);
|
||||
|
||||
/* Walk the instruction list, finding which instructions have more
|
||||
* than one uniform referenced, and add those uniform values to the
|
||||
* ht.
|
||||
*/
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
uint32_t nsrc = vir_get_nsrc(inst);
|
||||
|
||||
if (vir_get_instruction_uniform_count(inst) <= 1)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < nsrc; i++) {
|
||||
if (is_lowerable_uniform(inst, i))
|
||||
add_uniform(ht, inst->src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
while (ht->entries) {
|
||||
/* Find the most commonly used uniform in instructions that
|
||||
* need a uniform lowered.
|
||||
*/
|
||||
uint32_t max_count = 0;
|
||||
uint32_t max_index = 0;
|
||||
struct hash_entry *entry;
|
||||
hash_table_foreach(ht, entry) {
|
||||
uint32_t count = (uintptr_t)entry->data;
|
||||
uint32_t index = (uintptr_t)entry->key - 1;
|
||||
if (count > max_count) {
|
||||
max_count = count;
|
||||
max_index = index;
|
||||
}
|
||||
}
|
||||
|
||||
struct qreg unif = vir_reg(QFILE_UNIF, max_index);
|
||||
|
||||
/* Now, find the instructions using this uniform and make them
|
||||
* reference a temp instead.
|
||||
*/
|
||||
vir_for_each_block(block, c) {
|
||||
struct qinst *mov = NULL;
|
||||
|
||||
vir_for_each_inst(inst, block) {
|
||||
uint32_t nsrc = vir_get_nsrc(inst);
|
||||
|
||||
uint32_t count = vir_get_instruction_uniform_count(inst);
|
||||
|
||||
if (count <= 1)
|
||||
continue;
|
||||
|
||||
/* If the block doesn't have a load of the
|
||||
* uniform yet, add it. We could potentially
|
||||
* do better and CSE MOVs from multiple blocks
|
||||
* into dominating blocks, except that may
|
||||
* cause troubles for register allocation.
|
||||
*/
|
||||
if (!mov) {
|
||||
mov = vir_mul_inst(V3D_QPU_M_MOV,
|
||||
vir_get_temp(c),
|
||||
unif, c->undef);
|
||||
list_add(&mov->link,
|
||||
&block->instructions);
|
||||
c->defs[mov->dst.index] = mov;
|
||||
}
|
||||
|
||||
bool removed = false;
|
||||
for (int i = 0; i < nsrc; i++) {
|
||||
if (is_lowerable_uniform(inst, i) &&
|
||||
inst->src[i].index == max_index) {
|
||||
inst->src[i].file =
|
||||
mov->dst.file;
|
||||
inst->src[i].index =
|
||||
mov->dst.index;
|
||||
remove_uniform(ht, unif);
|
||||
removed = true;
|
||||
}
|
||||
}
|
||||
if (removed)
|
||||
count--;
|
||||
|
||||
/* If the instruction doesn't need lowering any more,
|
||||
* then drop it from the list.
|
||||
*/
|
||||
if (count <= 1) {
|
||||
for (int i = 0; i < nsrc; i++) {
|
||||
if (is_lowerable_uniform(inst, i))
|
||||
remove_uniform(ht, inst->src[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_hash_table_destroy(ht, NULL);
|
||||
}
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file v3d_opt_copy_propagation.c
|
||||
*
|
||||
* This implements simple copy propagation for VIR without control flow.
|
||||
*
|
||||
* For each temp, it keeps a qreg of which source it was MOVed from, if it
|
||||
* was. If we see that used later, we can just reuse the source value, since
|
||||
* we know we don't have control flow, and we have SSA for our values so
|
||||
* there's no killing to worry about.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
static bool
|
||||
is_copy_mov(struct qinst *inst)
|
||||
{
|
||||
if (!inst)
|
||||
return false;
|
||||
|
||||
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
(inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
|
||||
inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->dst.file != QFILE_TEMP)
|
||||
return false;
|
||||
|
||||
if (inst->src[0].file != QFILE_TEMP &&
|
||||
inst->src[0].file != QFILE_UNIF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
||||
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
|
||||
inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (inst->src[0].file) {
|
||||
case QFILE_MAGIC:
|
||||
/* No copy propagating from R3/R4/R5 -- the MOVs from those
|
||||
* are there to register allocate values produced into R3/4/5
|
||||
* to other regs (though hopefully r3/4/5).
|
||||
*/
|
||||
switch (inst->src[0].index) {
|
||||
case V3D_QPU_WADDR_R3:
|
||||
case V3D_QPU_WADDR_R4:
|
||||
case V3D_QPU_WADDR_R5:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case QFILE_REG:
|
||||
switch (inst->src[0].index) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
/* MOVs from rf0/1/2 are only to track the live
|
||||
* intervals for W/centroid W/Z.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
vir_has_unpack(struct qinst *inst, int chan)
|
||||
{
|
||||
assert(chan == 0 || chan == 1);
|
||||
|
||||
if (vir_is_add(inst)) {
|
||||
if (chan == 0)
|
||||
return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE;
|
||||
else
|
||||
return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE;
|
||||
} else {
|
||||
if (chan == 0)
|
||||
return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE;
|
||||
else
|
||||
return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
|
||||
{
|
||||
bool debug = false;
|
||||
bool progress = false;
|
||||
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file != QFILE_TEMP)
|
||||
continue;
|
||||
|
||||
/* We have two ways of finding MOVs we can copy propagate
|
||||
* from. One is if it's an SSA def: then we can reuse it from
|
||||
* any block in the program, as long as its source is also an
|
||||
* SSA def. Alternatively, if it's in the "movs" array
|
||||
* tracked within the block, then we know the sources for it
|
||||
* haven't been changed since we saw the instruction within
|
||||
* our block.
|
||||
*/
|
||||
struct qinst *mov = movs[inst->src[i].index];
|
||||
if (!mov) {
|
||||
if (!is_copy_mov(c->defs[inst->src[i].index]))
|
||||
continue;
|
||||
mov = c->defs[inst->src[i].index];
|
||||
|
||||
if (mov->src[0].file == QFILE_TEMP &&
|
||||
!c->defs[mov->src[0].index])
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vir_has_unpack(mov, 0)) {
|
||||
/* Make sure that the meaning of the unpack
|
||||
* would be the same between the two
|
||||
* instructions.
|
||||
*/
|
||||
if (vir_is_float_input(inst) !=
|
||||
vir_is_float_input(mov)) {
|
||||
continue;
|
||||
}
|
||||
/* No composing the unpacks. */
|
||||
if (vir_has_unpack(inst, i))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Copy propagate: ");
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
inst->src[i] = mov->src[0];
|
||||
if (vir_has_unpack(mov, 0)) {
|
||||
enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack;
|
||||
|
||||
vir_set_unpack(inst, i, unpack);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "to: ");
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
apply_kills(struct v3d_compile *c, struct qinst **movs, struct qinst *inst)
|
||||
{
|
||||
if (inst->dst.file != QFILE_TEMP)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (movs[i] &&
|
||||
(movs[i]->dst.index == inst->dst.index ||
|
||||
(movs[i]->src[0].file == QFILE_TEMP &&
|
||||
movs[i]->src[0].index == inst->dst.index))) {
|
||||
movs[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
vir_opt_copy_propagate(struct v3d_compile *c)
|
||||
{
|
||||
bool progress = false;
|
||||
struct qinst **movs;
|
||||
|
||||
movs = ralloc_array(c, struct qinst *, c->num_temps);
|
||||
if (!movs)
|
||||
return false;
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
/* The MOVs array tracks only available movs within the
|
||||
* block.
|
||||
*/
|
||||
memset(movs, 0, sizeof(struct qinst *) * c->num_temps);
|
||||
|
||||
vir_for_each_inst(inst, block) {
|
||||
progress = try_copy_prop(c, inst, movs) || progress;
|
||||
|
||||
apply_kills(c, movs, inst);
|
||||
|
||||
if (is_copy_mov(inst))
|
||||
movs[inst->dst.index] = inst;
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(movs);
|
||||
|
||||
return progress;
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file v3d_opt_dead_code.c
|
||||
*
|
||||
* This is a simple dead code eliminator for SSA values in VIR.
|
||||
*
|
||||
* It walks all the instructions finding what temps are used, then walks again
|
||||
* to remove instructions writing unused temps.
|
||||
*
|
||||
* This is an inefficient implementation if you have long chains of
|
||||
* instructions where the entire chain is dead, but we expect those to have
|
||||
* been eliminated at the NIR level, and here we're just cleaning up small
|
||||
* problems produced by NIR->VIR.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
static bool debug;
|
||||
|
||||
static void
|
||||
dce(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
if (debug) {
|
||||
fprintf(stderr, "Removing: ");
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
assert(inst->qpu.flags.apf == V3D_QPU_PF_NONE);
|
||||
assert(inst->qpu.flags.mpf == V3D_QPU_PF_NONE);
|
||||
vir_remove_instruction(c, inst);
|
||||
}
|
||||
|
||||
static bool
|
||||
has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file == QFILE_VPM) {
|
||||
/* Instance ID, Vertex ID: Should have been removed at
|
||||
* the NIR level
|
||||
*/
|
||||
if (inst->src[i].index == ~0)
|
||||
return true;
|
||||
|
||||
uint32_t attr = inst->src[i].index / 4;
|
||||
uint32_t offset = inst->src[i].index % 4;
|
||||
|
||||
if (c->vattr_sizes[attr] != offset)
|
||||
return true;
|
||||
|
||||
/* Can't get rid of the last VPM read, or the
|
||||
* simulator (at least) throws an error.
|
||||
*/
|
||||
uint32_t total_size = 0;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
|
||||
total_size += c->vattr_sizes[i];
|
||||
if (total_size == 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Dead code removal of varyings is tricky, so just assert
|
||||
* that it all happened at the NIR level.
|
||||
*/
|
||||
if (inst->src[i].file == QFILE_VARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_opt_dead_code(struct v3d_compile *c)
|
||||
{
|
||||
bool progress = false;
|
||||
bool *used = calloc(c->num_temps, sizeof(bool));
|
||||
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file == QFILE_TEMP)
|
||||
used[inst->src[i].index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
vir_for_each_block(block, c) {
|
||||
vir_for_each_inst_safe(inst, block) {
|
||||
if (inst->dst.file != QFILE_NULL &&
|
||||
!(inst->dst.file == QFILE_TEMP &&
|
||||
!used[inst->dst.index])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vir_has_side_effects(c, inst))
|
||||
continue;
|
||||
|
||||
if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
|
||||
inst->qpu.flags.mpf != V3D_QPU_PF_NONE||
|
||||
has_nonremovable_reads(c, inst)) {
|
||||
/* If we can't remove the instruction, but we
|
||||
* don't need its destination value, just
|
||||
* remove the destination. The register
|
||||
* allocator would trivially color it and it
|
||||
* wouldn't cause any register pressure, but
|
||||
* it's nicer to read the VIR code without
|
||||
* unused destination regs.
|
||||
*/
|
||||
if (inst->dst.file == QFILE_TEMP) {
|
||||
if (debug) {
|
||||
fprintf(stderr,
|
||||
"Removing dst from: ");
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
c->defs[inst->dst.index] = NULL;
|
||||
inst->dst.file = QFILE_NULL;
|
||||
progress = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file != QFILE_VPM)
|
||||
continue;
|
||||
uint32_t attr = inst->src[i].index / 4;
|
||||
uint32_t offset = (inst->src[i].index % 4);
|
||||
|
||||
if (c->vattr_sizes[attr] == offset) {
|
||||
c->num_inputs--;
|
||||
c->vattr_sizes[attr]--;
|
||||
}
|
||||
}
|
||||
|
||||
dce(c, inst);
|
||||
progress = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
free(used);
|
||||
|
||||
return progress;
|
||||
}
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
#define QPU_R(i) { .magic = false, .index = i }
|
||||
|
||||
#define ACC_INDEX 0
|
||||
#define ACC_COUNT 5
|
||||
#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
|
||||
#define PHYS_COUNT 64
|
||||
|
||||
bool
|
||||
vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
{
|
||||
compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
|
||||
true);
|
||||
if (!compiler->regs)
|
||||
return false;
|
||||
|
||||
/* Allocate 3 regfile classes, for the ways the physical register file
|
||||
* can be divided up for fragment shader threading.
|
||||
*/
|
||||
for (int threads = 0; threads < 3; threads++) {
|
||||
compiler->reg_class[threads] =
|
||||
ra_alloc_reg_class(compiler->regs);
|
||||
|
||||
for (int i = PHYS_INDEX;
|
||||
i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
|
||||
ra_class_add_reg(compiler->regs,
|
||||
compiler->reg_class[threads], i);
|
||||
}
|
||||
|
||||
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT; i++) {
|
||||
ra_class_add_reg(compiler->regs,
|
||||
compiler->reg_class[threads], i);
|
||||
}
|
||||
}
|
||||
|
||||
ra_set_finalize(compiler->regs, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct node_to_temp_map {
|
||||
uint32_t temp;
|
||||
uint32_t priority;
|
||||
};
|
||||
|
||||
static int
|
||||
node_to_temp_priority(const void *in_a, const void *in_b)
|
||||
{
|
||||
const struct node_to_temp_map *a = in_a;
|
||||
const struct node_to_temp_map *b = in_b;
|
||||
|
||||
return a->priority - b->priority;
|
||||
}
|
||||
|
||||
#define CLASS_BIT_PHYS (1 << 0)
|
||||
#define CLASS_BIT_R0_R2 (1 << 1)
|
||||
#define CLASS_BIT_R3 (1 << 2)
|
||||
#define CLASS_BIT_R4 (1 << 3)
|
||||
|
||||
/**
|
||||
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
|
||||
*
|
||||
* The return value should be freed by the caller.
|
||||
*/
|
||||
struct qpu_reg *
|
||||
v3d_register_allocate(struct v3d_compile *c)
|
||||
{
|
||||
struct node_to_temp_map map[c->num_temps];
|
||||
uint32_t temp_to_node[c->num_temps];
|
||||
uint8_t class_bits[c->num_temps];
|
||||
struct qpu_reg *temp_registers = calloc(c->num_temps,
|
||||
sizeof(*temp_registers));
|
||||
int acc_nodes[ACC_COUNT];
|
||||
|
||||
struct ra_graph *g = ra_alloc_interference_graph(c->compiler->regs,
|
||||
c->num_temps +
|
||||
ARRAY_SIZE(acc_nodes));
|
||||
|
||||
/* Make some fixed nodes for the accumulators, which we will need to
|
||||
* interfere with when ops have implied r3/r4 writes or for the thread
|
||||
* switches. We could represent these as classes for the nodes to
|
||||
* live in, but the classes take up a lot of memory to set up, so we
|
||||
* don't want to make too many.
|
||||
*/
|
||||
for (int i = 0; i < ARRAY_SIZE(acc_nodes); i++) {
|
||||
acc_nodes[i] = c->num_temps + i;
|
||||
ra_set_node_reg(g, acc_nodes[i], ACC_INDEX + i);
|
||||
}
|
||||
|
||||
/* Compute the live ranges so we can figure out interference. */
|
||||
vir_calculate_live_intervals(c);
|
||||
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
map[i].temp = i;
|
||||
map[i].priority = c->temp_end[i] - c->temp_start[i];
|
||||
}
|
||||
qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
temp_to_node[map[i].temp] = i;
|
||||
}
|
||||
|
||||
/* Figure out our register classes and preallocated registers. We
|
||||
* start with any temp being able to be in any file, then instructions
|
||||
* incrementally remove bits that the temp definitely can't be in.
|
||||
*/
|
||||
memset(class_bits,
|
||||
CLASS_BIT_PHYS | CLASS_BIT_R0_R2 | CLASS_BIT_R3 | CLASS_BIT_R4,
|
||||
sizeof(class_bits));
|
||||
|
||||
int ip = 0;
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
/* If the instruction writes r3/r4 (and optionally moves its
|
||||
* result to a temp), nothing else can be stored in r3/r4 across
|
||||
* it.
|
||||
*/
|
||||
if (vir_writes_r3(inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip &&
|
||||
c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(g,
|
||||
temp_to_node[i],
|
||||
acc_nodes[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (vir_writes_r4(inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip &&
|
||||
c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(g,
|
||||
temp_to_node[i],
|
||||
acc_nodes[4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->src[0].file == QFILE_REG) {
|
||||
switch (inst->src[0].index) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
/* Payload setup instructions: Force allocate
|
||||
* the dst to the given register (so the MOV
|
||||
* will disappear).
|
||||
*/
|
||||
assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
ra_set_node_reg(g,
|
||||
temp_to_node[inst->dst.index],
|
||||
PHYS_INDEX +
|
||||
inst->src[0].index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
switch (inst->op) {
|
||||
case QOP_THRSW:
|
||||
/* All accumulators are invalidated across a thread
|
||||
* switch.
|
||||
*/
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip)
|
||||
class_bits[i] &= ~(CLASS_BIT_R0_R3 |
|
||||
CLASS_BIT_R4);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
ip++;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
ra_set_node_class(g, temp_to_node[i],
|
||||
c->compiler->reg_class[c->fs_threaded]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
for (uint32_t j = i + 1; j < c->num_temps; j++) {
|
||||
if (!(c->temp_start[i] >= c->temp_end[j] ||
|
||||
c->temp_start[j] >= c->temp_end[i])) {
|
||||
ra_add_node_interference(g,
|
||||
temp_to_node[i],
|
||||
temp_to_node[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ok = ra_allocate(g);
|
||||
if (!ok) {
|
||||
if (!c->fs_threaded) {
|
||||
fprintf(stderr, "Failed to register allocate:\n");
|
||||
vir_dump(c);
|
||||
}
|
||||
|
||||
c->failed = true;
|
||||
free(temp_registers);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
int ra_reg = ra_get_node_reg(g, temp_to_node[i]);
|
||||
if (ra_reg < PHYS_INDEX) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
|
||||
ra_reg - ACC_INDEX);
|
||||
} else {
|
||||
temp_registers[i].magic = false;
|
||||
temp_registers[i].index = ra_reg - PHYS_INDEX;
|
||||
}
|
||||
|
||||
/* If the value's never used, just write to the NOP register
|
||||
* for clarity in debug output.
|
||||
*/
|
||||
if (c->temp_start[i] == c->temp_end[i]) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = V3D_QPU_WADDR_NOP;
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(g);
|
||||
|
||||
return temp_registers;
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
* Copyright © 2016 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/v3d_compiler.h"
|
||||
#include "qpu/qpu_instr.h"
|
||||
#include "qpu/qpu_disasm.h"
|
||||
|
||||
static inline struct qpu_reg
|
||||
qpu_reg(int index)
|
||||
{
|
||||
struct qpu_reg reg = {
|
||||
.magic = false,
|
||||
.index = index,
|
||||
};
|
||||
return reg;
|
||||
}
|
||||
|
||||
static inline struct qpu_reg
|
||||
qpu_magic(enum v3d_qpu_waddr waddr)
|
||||
{
|
||||
struct qpu_reg reg = {
|
||||
.magic = true,
|
||||
.index = waddr,
|
||||
};
|
||||
return reg;
|
||||
}
|
||||
|
||||
static inline struct qpu_reg
|
||||
qpu_acc(int acc)
|
||||
{
|
||||
return qpu_magic(V3D_QPU_WADDR_R0 + acc);
|
||||
}
|
||||
|
||||
struct v3d_qpu_instr
|
||||
v3d_qpu_nop(void)
|
||||
{
|
||||
struct v3d_qpu_instr instr = {
|
||||
.type = V3D_QPU_INSTR_TYPE_ALU,
|
||||
.alu = {
|
||||
.add = {
|
||||
.op = V3D_QPU_A_NOP,
|
||||
.waddr = V3D_QPU_WADDR_NOP,
|
||||
.magic_write = true,
|
||||
},
|
||||
.mul = {
|
||||
.op = V3D_QPU_M_NOP,
|
||||
.waddr = V3D_QPU_WADDR_NOP,
|
||||
.magic_write = true,
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
||||
static struct qinst *
|
||||
vir_nop(void)
|
||||
{
|
||||
struct qreg undef = { QFILE_NULL, 0 };
|
||||
struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
|
||||
|
||||
return qinst;
|
||||
}
|
||||
|
||||
static struct qinst *
|
||||
new_qpu_nop_before(struct qinst *inst)
|
||||
{
|
||||
struct qinst *q = vir_nop();
|
||||
|
||||
list_addtail(&q->link, &inst->link);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
static void
|
||||
new_ldunif_instr(struct qinst *inst, int i)
|
||||
{
|
||||
struct qinst *ldunif = new_qpu_nop_before(inst);
|
||||
|
||||
ldunif->qpu.sig.ldunif = true;
|
||||
assert(inst->src[i].file == QFILE_UNIF);
|
||||
ldunif->uniform = inst->src[i].index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocates the src register (accumulator or register file) into the RADDR
|
||||
* fields of the instruction.
|
||||
*/
|
||||
static void
|
||||
set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
{
|
||||
if (src.magic) {
|
||||
assert(src.index >= V3D_QPU_WADDR_R0 &&
|
||||
src.index <= V3D_QPU_WADDR_R5);
|
||||
*mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (instr->alu.add.a != V3D_QPU_MUX_A &&
|
||||
instr->alu.add.b != V3D_QPU_MUX_A &&
|
||||
instr->alu.mul.a != V3D_QPU_MUX_A &&
|
||||
instr->alu.mul.b != V3D_QPU_MUX_A) {
|
||||
instr->raddr_a = src.index;
|
||||
*mux = V3D_QPU_MUX_A;
|
||||
} else {
|
||||
if (instr->raddr_a == src.index) {
|
||||
*mux = V3D_QPU_MUX_A;
|
||||
} else {
|
||||
assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
|
||||
instr->alu.add.b == V3D_QPU_MUX_B &&
|
||||
instr->alu.mul.a == V3D_QPU_MUX_B &&
|
||||
instr->alu.mul.b == V3D_QPU_MUX_B) ||
|
||||
src.index == instr->raddr_b);
|
||||
|
||||
instr->raddr_b = src.index;
|
||||
*mux = V3D_QPU_MUX_B;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_generate_code_block(struct v3d_compile *c,
|
||||
struct qblock *block,
|
||||
struct qpu_reg *temp_registers)
|
||||
{
|
||||
int last_vpm_read_index = -1;
|
||||
|
||||
vir_for_each_inst(qinst, block) {
|
||||
#if 0
|
||||
fprintf(stderr, "translating qinst to qpu: ");
|
||||
vir_dump_inst(c, qinst);
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
struct qinst *temp;
|
||||
|
||||
if (vir_has_implicit_uniform(qinst)) {
|
||||
int src = vir_get_implicit_uniform_src(qinst);
|
||||
assert(qinst->src[src].file == QFILE_UNIF);
|
||||
qinst->uniform = qinst->src[src].index;
|
||||
c->num_uniforms++;
|
||||
}
|
||||
|
||||
int nsrc = vir_get_non_sideband_nsrc(qinst);
|
||||
struct qpu_reg src[ARRAY_SIZE(qinst->src)];
|
||||
bool emitted_ldunif = false;
|
||||
for (int i = 0; i < nsrc; i++) {
|
||||
int index = qinst->src[i].index;
|
||||
switch (qinst->src[i].file) {
|
||||
case QFILE_REG:
|
||||
src[i] = qpu_reg(qinst->src[i].index);
|
||||
break;
|
||||
case QFILE_MAGIC:
|
||||
src[i] = qpu_magic(qinst->src[i].index);
|
||||
break;
|
||||
case QFILE_NULL:
|
||||
case QFILE_LOAD_IMM:
|
||||
src[i] = qpu_acc(0);
|
||||
break;
|
||||
case QFILE_TEMP:
|
||||
src[i] = temp_registers[index];
|
||||
break;
|
||||
case QFILE_UNIF:
|
||||
if (!emitted_ldunif) {
|
||||
new_ldunif_instr(qinst, i);
|
||||
c->num_uniforms++;
|
||||
emitted_ldunif = true;
|
||||
}
|
||||
|
||||
src[i] = qpu_acc(5);
|
||||
break;
|
||||
case QFILE_VARY:
|
||||
temp = new_qpu_nop_before(qinst);
|
||||
temp->qpu.sig.ldvary = true;
|
||||
|
||||
src[i] = qpu_acc(3);
|
||||
break;
|
||||
case QFILE_SMALL_IMM:
|
||||
abort(); /* XXX */
|
||||
#if 0
|
||||
src[i].mux = QPU_MUX_SMALL_IMM;
|
||||
src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
|
||||
/* This should only have returned a valid
|
||||
* small immediate field, not ~0 for failure.
|
||||
*/
|
||||
assert(src[i].addr <= 47);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
assert((int)qinst->src[i].index >=
|
||||
last_vpm_read_index);
|
||||
(void)last_vpm_read_index;
|
||||
last_vpm_read_index = qinst->src[i].index;
|
||||
|
||||
temp = new_qpu_nop_before(qinst);
|
||||
temp->qpu.sig.ldvpm = true;
|
||||
|
||||
src[i] = qpu_acc(3);
|
||||
break;
|
||||
|
||||
case QFILE_TLB:
|
||||
case QFILE_TLBU:
|
||||
unreachable("bad vir src file");
|
||||
}
|
||||
}
|
||||
|
||||
struct qpu_reg dst;
|
||||
switch (qinst->dst.file) {
|
||||
case QFILE_NULL:
|
||||
dst = qpu_magic(V3D_QPU_WADDR_NOP);
|
||||
break;
|
||||
|
||||
case QFILE_REG:
|
||||
dst = qpu_reg(qinst->dst.index);
|
||||
break;
|
||||
|
||||
case QFILE_MAGIC:
|
||||
dst = qpu_magic(qinst->dst.index);
|
||||
break;
|
||||
|
||||
case QFILE_TEMP:
|
||||
dst = temp_registers[qinst->dst.index];
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
dst = qpu_magic(V3D_QPU_WADDR_VPM);
|
||||
break;
|
||||
|
||||
case QFILE_TLB:
|
||||
dst = qpu_magic(V3D_QPU_WADDR_TLB);
|
||||
break;
|
||||
|
||||
case QFILE_TLBU:
|
||||
dst = qpu_magic(V3D_QPU_WADDR_TLBU);
|
||||
break;
|
||||
|
||||
case QFILE_VARY:
|
||||
case QFILE_UNIF:
|
||||
case QFILE_SMALL_IMM:
|
||||
case QFILE_LOAD_IMM:
|
||||
assert(!"not reached");
|
||||
break;
|
||||
}
|
||||
|
||||
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
&qinst->qpu.alu.add.a, src[0]);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
&qinst->qpu.alu.add.b, src[1]);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.add.waddr = dst.index;
|
||||
qinst->qpu.alu.add.magic_write = dst.magic;
|
||||
} else {
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
&qinst->qpu.alu.mul.a, src[0]);
|
||||
}
|
||||
if (nsrc >= 2) {
|
||||
set_src(&qinst->qpu,
|
||||
&qinst->qpu.alu.mul.b, src[1]);
|
||||
}
|
||||
|
||||
qinst->qpu.alu.mul.waddr = dst.index;
|
||||
qinst->qpu.alu.mul.magic_write = dst.magic;
|
||||
}
|
||||
} else {
|
||||
assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
v3d_dump_qpu(struct v3d_compile *c)
|
||||
{
|
||||
fprintf(stderr, "%s prog %d/%d QPU:\n",
|
||||
vir_get_stage_name(c),
|
||||
c->program_id, c->variant_id);
|
||||
|
||||
for (int i = 0; i < c->qpu_inst_count; i++) {
|
||||
const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
|
||||
fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
void
|
||||
v3d_vir_to_qpu(struct v3d_compile *c)
|
||||
{
|
||||
struct qpu_reg *temp_registers = v3d_register_allocate(c);
|
||||
struct qblock *end_block = list_last_entry(&c->blocks,
|
||||
struct qblock, link);
|
||||
|
||||
/* Reset the uniform count to how many will be actually loaded by the
|
||||
* generated QPU code.
|
||||
*/
|
||||
c->num_uniforms = 0;
|
||||
|
||||
vir_for_each_block(block, c)
|
||||
v3d_generate_code_block(c, block, temp_registers);
|
||||
|
||||
struct qinst *thrsw = vir_nop();
|
||||
list_addtail(&thrsw->link, &end_block->instructions);
|
||||
thrsw->qpu.sig.thrsw = true;
|
||||
|
||||
uint32_t cycles = v3d_qpu_schedule_instructions(c);
|
||||
|
||||
c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
|
||||
int i = 0;
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
|
||||
&c->qpu_insts[i++]);
|
||||
assert(ok); (void) ok;
|
||||
}
|
||||
assert(i == c->qpu_inst_count);
|
||||
|
||||
if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
|
||||
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
|
||||
vir_get_stage_name(c),
|
||||
c->program_id, c->variant_id,
|
||||
cycles);
|
||||
}
|
||||
|
||||
if (V3D_DEBUG & (V3D_DEBUG_QPU |
|
||||
v3d_debug_flag_for_shader_stage(c->s->stage))) {
|
||||
v3d_dump_qpu(c);
|
||||
}
|
||||
|
||||
qpu_validate(c);
|
||||
|
||||
free(temp_registers);
|
||||
}
|
Loading…
Reference in New Issue