i965/vs: Replace brw_vs_emit.c with dumping code into the vec4_visitor.
Rather than having two separate backends, just create a small layer that translates the subset of Mesa IR used for ARB_vertex_program and fixed function programs to the Vec4 IR. This allows us to use the same optimization passes, code generator, register allocator as for GLSL. v2: Incorporate Eric's review comments. - Fix use of uninitialized src_swiz[] values in the SWIZZLE_ZERO/ONE case: just initialize it to 0 (.x) since the value doesn't matter (those channels get writemasked out anyway). - Properly reswizzle source register's swizzles, rather than overwriting the swizzle. - Port the old brw_vs_emit code for computing .x of the EXP2 opcode. - Update comments, removing mention of NV_vertex_program, etc. - Delete remaining #warning lines and debug comments. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
1f0093720d
commit
66c8473e02
|
@ -128,6 +128,7 @@ i965_CXX_FILES = \
|
|||
brw_vec4_live_variables.cpp \
|
||||
brw_vec4_reg_allocate.cpp \
|
||||
brw_vec4_visitor.cpp \
|
||||
brw_vec4_vp.cpp \
|
||||
gen6_blorp.cpp \
|
||||
gen7_blorp.cpp
|
||||
|
||||
|
|
|
@ -188,7 +188,7 @@ public:
|
|||
/** @{
|
||||
* Annotation for the generated IR. One of the two can be set.
|
||||
*/
|
||||
ir_instruction *ir;
|
||||
const void *ir;
|
||||
const char *annotation;
|
||||
|
||||
bool is_tex();
|
||||
|
@ -223,7 +223,7 @@ public:
|
|||
* GLSL IR currently being processed, which is associated with our
|
||||
* driver IR instructions for debugging purposes.
|
||||
*/
|
||||
ir_instruction *base_ir;
|
||||
const void *base_ir;
|
||||
const char *current_annotation;
|
||||
|
||||
int *virtual_grf_sizes;
|
||||
|
@ -235,6 +235,9 @@ public:
|
|||
int *virtual_grf_use;
|
||||
dst_reg userplane[MAX_CLIP_PLANES];
|
||||
|
||||
src_reg *vp_temp_regs;
|
||||
src_reg vp_addr_reg;
|
||||
|
||||
/**
|
||||
* This is the size to be used for an array with an element per
|
||||
* reg_offset
|
||||
|
@ -249,6 +252,8 @@ public:
|
|||
|
||||
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
|
||||
|
||||
bool need_all_constants_in_pull_buffer;
|
||||
|
||||
src_reg src_reg_for_float(float val);
|
||||
|
||||
/**
|
||||
|
@ -369,6 +374,13 @@ public:
|
|||
/** Walks an exec_list of ir_instruction and sends it through this visitor. */
|
||||
void visit_instructions(const exec_list *list);
|
||||
|
||||
void setup_vp_regs();
|
||||
void emit_vertex_program_code();
|
||||
void emit_vp_sop(uint32_t condmod, dst_reg dst,
|
||||
src_reg src0, src_reg src1, src_reg one);
|
||||
dst_reg get_vp_dst_reg(const prog_dst_register &dst);
|
||||
src_reg get_vp_src_reg(const prog_src_register &src);
|
||||
|
||||
void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
|
||||
void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
|
||||
void emit_if_gen6(ir_if *ir);
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
extern "C" {
|
||||
#include "brw_eu.h"
|
||||
#include "main/macros.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_parameter.h"
|
||||
};
|
||||
|
||||
using namespace brw;
|
||||
|
@ -774,13 +776,17 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
|
|||
bool
|
||||
vec4_visitor::run()
|
||||
{
|
||||
if (c->key.userclip_active && !c->key.uses_clip_distance)
|
||||
setup_uniform_clipplane_values();
|
||||
|
||||
/* Generate VS IR for main(). (the visitor only descends into
|
||||
* functions called "main").
|
||||
*/
|
||||
visit_instructions(shader->ir);
|
||||
if (shader) {
|
||||
visit_instructions(shader->ir);
|
||||
} else {
|
||||
emit_vertex_program_code();
|
||||
}
|
||||
|
||||
if (c->key.userclip_active && !c->key.uses_clip_distance)
|
||||
setup_uniform_clipplane_values();
|
||||
|
||||
emit_urb_writes();
|
||||
|
||||
|
@ -790,8 +796,18 @@ vec4_visitor::run()
|
|||
* that we have reladdr computations available for CSE, since we'll
|
||||
* often do repeated subexpressions for those.
|
||||
*/
|
||||
move_grf_array_access_to_scratch();
|
||||
move_uniform_array_access_to_pull_constants();
|
||||
if (shader) {
|
||||
move_grf_array_access_to_scratch();
|
||||
move_uniform_array_access_to_pull_constants();
|
||||
} else {
|
||||
/* The ARB_vertex_program frontend emits pull constant loads directly
|
||||
* rather than using reladdr, so we don't need to walk through all the
|
||||
* instructions looking for things to move. There isn't anything.
|
||||
*
|
||||
* We do still need to split things to vec4 size.
|
||||
*/
|
||||
split_uniform_registers();
|
||||
}
|
||||
pack_uniform_registers();
|
||||
move_push_constants_to_pull_constants();
|
||||
split_virtual_grfs();
|
||||
|
@ -844,10 +860,14 @@ vec4_visitor::generate_code()
|
|||
{
|
||||
int last_native_insn_offset = 0;
|
||||
const char *last_annotation_string = NULL;
|
||||
ir_instruction *last_annotation_ir = NULL;
|
||||
const void *last_annotation_ir = NULL;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
printf("Native code for vertex shader %d:\n", prog->Name);
|
||||
if (shader) {
|
||||
printf("Native code for vertex shader %d:\n", prog->Name);
|
||||
} else {
|
||||
printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
|
||||
}
|
||||
}
|
||||
|
||||
foreach_list(node, &this->instructions) {
|
||||
|
@ -859,7 +879,15 @@ vec4_visitor::generate_code()
|
|||
last_annotation_ir = inst->ir;
|
||||
if (last_annotation_ir) {
|
||||
printf(" ");
|
||||
last_annotation_ir->print();
|
||||
if (shader) {
|
||||
((ir_instruction *) last_annotation_ir)->print();
|
||||
} else {
|
||||
const prog_instruction *vpi;
|
||||
vpi = (const prog_instruction *) inst->ir;
|
||||
printf("%d: ", (int)(vpi - vp->Base.Instructions));
|
||||
_mesa_fprint_instruction_opt(stdout, vpi, 0,
|
||||
PROG_PRINT_DEBUG, NULL);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
@ -1032,24 +1060,26 @@ brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
|
|||
bool start_busy = false;
|
||||
float start_time = 0;
|
||||
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
|
||||
start_busy = (intel->batch.last_bo &&
|
||||
drm_intel_bo_busy(intel->batch.last_bo));
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
struct brw_shader *shader =
|
||||
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
if (!shader)
|
||||
return false;
|
||||
struct brw_shader *shader = NULL;
|
||||
if (prog)
|
||||
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
printf("GLSL IR for native vertex shader %d:\n", prog->Name);
|
||||
_mesa_print_ir(shader->ir, NULL);
|
||||
printf("\n\n");
|
||||
if (shader) {
|
||||
printf("GLSL IR for native vertex shader %d:\n", prog->Name);
|
||||
_mesa_print_ir(shader->ir, NULL);
|
||||
printf("\n\n");
|
||||
} else {
|
||||
printf("ARB_vertex_program %d for native vertex shader\n",
|
||||
c->vp->program.Base.Id);
|
||||
_mesa_print_program(&c->vp->program.Base);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) {
|
||||
|
|
|
@ -2701,8 +2701,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
|
|||
this->current_annotation = NULL;
|
||||
|
||||
this->c = c;
|
||||
this->vp = (struct gl_vertex_program *)
|
||||
prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
|
||||
this->vp = &c->vp->program;
|
||||
this->prog_data = &c->prog_data;
|
||||
|
||||
this->variable_ht = hash_table_ctor(0,
|
||||
|
|
|
@ -0,0 +1,667 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file brw_vec4_vp.cpp
|
||||
*
|
||||
* A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
|
||||
* ARB_vertex_program and fixed-function vertex processing.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_vec4.h"
|
||||
extern "C" {
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_print.h"
|
||||
}
|
||||
using namespace brw;
|
||||
|
||||
void
|
||||
vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
|
||||
dst_reg dst, src_reg src0, src_reg src1,
|
||||
src_reg one)
|
||||
{
|
||||
vec4_instruction *inst;
|
||||
|
||||
inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
|
||||
inst->conditional_mod = conditional_mod;
|
||||
|
||||
inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reswizzle a given source register.
|
||||
* \sa brw_swizzle().
|
||||
*/
|
||||
static inline src_reg
|
||||
reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
|
||||
{
|
||||
src_reg t = orig;
|
||||
t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
|
||||
BRW_GET_SWZ(orig.swizzle, y),
|
||||
BRW_GET_SWZ(orig.swizzle, z),
|
||||
BRW_GET_SWZ(orig.swizzle, w));
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::emit_vertex_program_code()
|
||||
{
|
||||
this->need_all_constants_in_pull_buffer = false;
|
||||
|
||||
setup_vp_regs();
|
||||
|
||||
/* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
|
||||
* be:
|
||||
*
|
||||
* sel.f0 dst 1.0 0.0
|
||||
*
|
||||
* instead of
|
||||
*
|
||||
* mov dst 0.0
|
||||
* mov.f0 dst 1.0
|
||||
*/
|
||||
src_reg one = src_reg(this, glsl_type::float_type);
|
||||
emit(MOV(dst_reg(one), src_reg(1.0f)));
|
||||
|
||||
for (unsigned int insn = 0; insn < vp->Base.NumInstructions; insn++) {
|
||||
const struct prog_instruction *vpi = &vp->Base.Instructions[insn];
|
||||
base_ir = vpi;
|
||||
|
||||
dst_reg dst;
|
||||
src_reg src[3];
|
||||
|
||||
/* We always emit into a temporary destination register to avoid
|
||||
* aliasing issues.
|
||||
*/
|
||||
dst = dst_reg(this, glsl_type::vec4_type);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
src[i] = get_vp_src_reg(vpi->SrcReg[i]);
|
||||
|
||||
switch (vpi->Opcode) {
|
||||
case OPCODE_ABS:
|
||||
src[0].abs = true;
|
||||
src[0].negate = false;
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_ADD:
|
||||
emit(ADD(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_ARL:
|
||||
if (intel->gen >= 6) {
|
||||
dst.writemask = WRITEMASK_X;
|
||||
dst_reg dst_f = dst;
|
||||
dst_f.type = BRW_REGISTER_TYPE_F;
|
||||
|
||||
emit(RNDD(dst_f, src[0]));
|
||||
emit(MOV(dst, src_reg(dst_f)));
|
||||
} else {
|
||||
emit(RNDD(dst, src[0]));
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_DP3:
|
||||
emit(DP3(dst, src[0], src[1]));
|
||||
break;
|
||||
case OPCODE_DP4:
|
||||
emit(DP4(dst, src[0], src[1]));
|
||||
break;
|
||||
case OPCODE_DPH:
|
||||
emit(DPH(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_DST: {
|
||||
dst_reg t = dst;
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_X) {
|
||||
t.writemask = WRITEMASK_X;
|
||||
emit(MOV(t, src_reg(1.0f)));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
t.writemask = WRITEMASK_Y;
|
||||
emit(MUL(t, src[0], src[1]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
t.writemask = WRITEMASK_Z;
|
||||
emit(MOV(t, src[0]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
t.writemask = WRITEMASK_W;
|
||||
emit(MOV(t, src[1]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_EXP: {
|
||||
dst_reg result = dst;
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_X) {
|
||||
/* tmp_d = floor(src[0].x) */
|
||||
src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
|
||||
assert(tmp_d.type == BRW_REGISTER_TYPE_D);
|
||||
emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
|
||||
|
||||
/* result[0] = 2.0 ^ tmp */
|
||||
/* Adjust exponent for floating point: exp += 127 */
|
||||
dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
|
||||
emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
|
||||
|
||||
/* Install exponent and sign. Excess drops off the edge: */
|
||||
dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
|
||||
emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
emit(FRC(result, src[0]));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
result.writemask = WRITEMASK_Z;
|
||||
emit_math(SHADER_OPCODE_EXP2, result, src[0]);
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
result.writemask = WRITEMASK_W;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_EX2:
|
||||
emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_FLR:
|
||||
emit(RNDD(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_FRC:
|
||||
emit(FRC(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_LG2:
|
||||
emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_LIT: {
|
||||
dst_reg result = dst;
|
||||
/* From the ARB_vertex_program spec:
|
||||
*
|
||||
* tmp = VectorLoad(op0);
|
||||
* if (tmp.x < 0) tmp.x = 0;
|
||||
* if (tmp.y < 0) tmp.y = 0;
|
||||
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
|
||||
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
|
||||
* result.x = 1.0;
|
||||
* result.y = tmp.x;
|
||||
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
|
||||
* result.w = 1.0;
|
||||
*
|
||||
* Note that we don't do the clamping to +/- 128. We didn't in
|
||||
* brw_vs_emit.c either.
|
||||
*/
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
|
||||
result.writemask = WRITEMASK_XW;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
|
||||
result.writemask = WRITEMASK_YZ;
|
||||
emit(MOV(result, src_reg(0.0f)));
|
||||
|
||||
src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
|
||||
|
||||
emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
emit(MOV(result, tmp_x));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
/* if (tmp.y < 0) tmp.y = 0; */
|
||||
src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
|
||||
result.writemask = WRITEMASK_Z;
|
||||
emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
|
||||
|
||||
src_reg clamped_y(result);
|
||||
clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
|
||||
|
||||
src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
|
||||
|
||||
emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
|
||||
}
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_LOG: {
|
||||
dst_reg result = dst;
|
||||
result.type = BRW_REGISTER_TYPE_UD;
|
||||
src_reg result_src = src_reg(result);
|
||||
|
||||
src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
|
||||
arg0_ud.type = BRW_REGISTER_TYPE_UD;
|
||||
|
||||
/* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
|
||||
* according to spec:
|
||||
*
|
||||
* These almost look likey they could be joined up, but not really
|
||||
* practical:
|
||||
*
|
||||
* result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
|
||||
* result[1].i = (x.i & ((1<<23)-1) + (127<<23)
|
||||
*/
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
|
||||
result.writemask = WRITEMASK_X;
|
||||
emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
|
||||
emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
|
||||
src_reg result_d(result_src);
|
||||
result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
emit(ADD(result, result_d, src_reg(-127)));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
|
||||
result.writemask = WRITEMASK_Y;
|
||||
result.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
|
||||
emit(OR(result, result_src, src_reg(127u << 23)));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
/* result[2] = result[0] + LOG2(result[1]); */
|
||||
|
||||
/* Why bother? The above is just a hint how to do this with a
|
||||
* taylor series. Maybe we *should* use a taylor series as by
|
||||
* the time all the above has been done it's almost certainly
|
||||
* quicker than calling the mathbox, even with low precision.
|
||||
*
|
||||
* Options are:
|
||||
* - result[0] + mathbox.LOG2(result[1])
|
||||
* - mathbox.LOG2(arg0.x)
|
||||
* - result[0] + inline_taylor_approx(result[1])
|
||||
*/
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.writemask = WRITEMASK_Z;
|
||||
src_reg result_x(result), result_y(result), result_z(result);
|
||||
result_x.swizzle = BRW_SWIZZLE_XXXX;
|
||||
result_y.swizzle = BRW_SWIZZLE_YYYY;
|
||||
result_z.swizzle = BRW_SWIZZLE_ZZZZ;
|
||||
emit_math(SHADER_OPCODE_LOG2, result, result_y);
|
||||
emit(ADD(result, result_z, result_x));
|
||||
}
|
||||
|
||||
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.writemask = WRITEMASK_W;
|
||||
emit(MOV(result, src_reg(1.0f)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_MAD: {
|
||||
src_reg temp = src_reg(this, glsl_type::vec4_type);
|
||||
emit(MUL(dst_reg(temp), src[0], src[1]));
|
||||
emit(ADD(dst, temp, src[2]));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_MAX:
|
||||
emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MIN:
|
||||
emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MOV:
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_MUL:
|
||||
emit(MUL(dst, src[0], src[1]));
|
||||
break;
|
||||
|
||||
case OPCODE_POW:
|
||||
emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_RCP:
|
||||
emit_math(SHADER_OPCODE_RCP, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_RSQ:
|
||||
emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_SGE:
|
||||
emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SLT:
|
||||
emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SUB: {
|
||||
src_reg neg_src1 = src[1];
|
||||
neg_src1.negate = !src[1].negate;
|
||||
emit(ADD(dst, src[0], neg_src1));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_SWZ:
|
||||
/* Note that SWZ's extended swizzles are handled in the general
|
||||
* get_src_reg() code.
|
||||
*/
|
||||
emit(MOV(dst, src[0]));
|
||||
break;
|
||||
|
||||
case OPCODE_XPD: {
|
||||
src_reg t1 = src_reg(this, glsl_type::vec4_type);
|
||||
src_reg t2 = src_reg(this, glsl_type::vec4_type);
|
||||
|
||||
emit(MUL(dst_reg(t1),
|
||||
reswizzle(src[0], 1, 2, 0, 3),
|
||||
reswizzle(src[1], 2, 0, 1, 3)));
|
||||
emit(MUL(dst_reg(t2),
|
||||
reswizzle(src[0], 2, 0, 1, 3),
|
||||
reswizzle(src[1], 1, 2, 0, 3)));
|
||||
t2.negate = true;
|
||||
emit(ADD(dst, t1, t2));
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_END:
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
|
||||
_mesa_opcode_string(vpi->Opcode));
|
||||
}
|
||||
|
||||
/* Copy the temporary back into the actual destination register. */
|
||||
if (vpi->Opcode != OPCODE_END) {
|
||||
emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
|
||||
}
|
||||
}
|
||||
|
||||
/* If we used relative addressing, we need to upload all constants as
|
||||
* pull constants. Do that now.
|
||||
*/
|
||||
if (this->need_all_constants_in_pull_buffer) {
|
||||
const struct gl_program_parameter_list *params = c->vp->program.Base.Parameters;
|
||||
unsigned i;
|
||||
for (i = 0; i < params->NumParameters * 4; i++) {
|
||||
c->prog_data.pull_param[i] = ¶ms->ParameterValues[i / 4][i % 4].f;
|
||||
}
|
||||
c->prog_data.nr_pull_params = i;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::setup_vp_regs()
|
||||
{
|
||||
/* PROGRAM_TEMPORARY */
|
||||
int num_temp = vp->Base.NumTemporaries;
|
||||
vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
|
||||
for (int i = 0; i < num_temp; i++)
|
||||
vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
|
||||
|
||||
/* PROGRAM_STATE_VAR etc. */
|
||||
struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
|
||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||
unsigned components = plist->Parameters[p].Size;
|
||||
|
||||
/* Parameters should be either vec4 uniforms or single component
|
||||
* constants; matrices and other larger types should have been broken
|
||||
* down earlier.
|
||||
*/
|
||||
assert(components <= 4);
|
||||
|
||||
this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
|
||||
this->uniform_vector_size[this->uniforms] = components;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
c->prog_data.param[this->uniforms * 4 + i] = i >= components ? 0 :
|
||||
&plist->ParameterValues[p][i].f;
|
||||
}
|
||||
this->uniforms++; /* counted in vec4 units */
|
||||
}
|
||||
|
||||
/* PROGRAM_OUTPUT */
|
||||
for (int slot = 0; slot < c->prog_data.vue_map.num_slots; slot++) {
|
||||
int vert_result = c->prog_data.vue_map.slot_to_vert_result[slot];
|
||||
if (vert_result == VERT_RESULT_PSIZ)
|
||||
output_reg[vert_result] = dst_reg(this, glsl_type::float_type);
|
||||
else
|
||||
output_reg[vert_result] = dst_reg(this, glsl_type::vec4_type);
|
||||
assert(output_reg[vert_result].type == BRW_REGISTER_TYPE_F);
|
||||
}
|
||||
|
||||
/* PROGRAM_ADDRESS */
|
||||
this->vp_addr_reg = src_reg(this, glsl_type::int_type);
|
||||
assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
|
||||
}
|
||||
|
||||
dst_reg
|
||||
vec4_visitor::get_vp_dst_reg(const prog_dst_register &dst)
|
||||
{
|
||||
dst_reg result;
|
||||
|
||||
assert(!dst.RelAddr);
|
||||
|
||||
switch (dst.File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
result = dst_reg(vp_temp_regs[dst.Index]);
|
||||
break;
|
||||
|
||||
case PROGRAM_OUTPUT:
|
||||
result = output_reg[dst.Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_ADDRESS: {
|
||||
assert(dst.Index == 0);
|
||||
result = dst_reg(this->vp_addr_reg);
|
||||
break;
|
||||
}
|
||||
|
||||
case PROGRAM_UNDEFINED:
|
||||
return dst_null_f();
|
||||
|
||||
default:
|
||||
assert("vec4_vp: bad destination register file");
|
||||
return dst_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
|
||||
result.writemask = dst.WriteMask;
|
||||
return result;
|
||||
}
|
||||
|
||||
src_reg
|
||||
vec4_visitor::get_vp_src_reg(const prog_src_register &src)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
|
||||
|
||||
src_reg result;
|
||||
|
||||
assert(!src.Abs);
|
||||
|
||||
switch (src.File) {
|
||||
case PROGRAM_UNDEFINED:
|
||||
return src_reg(brw_null_reg());
|
||||
|
||||
case PROGRAM_TEMPORARY:
|
||||
result = vp_temp_regs[src.Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_INPUT:
|
||||
result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
break;
|
||||
|
||||
case PROGRAM_ADDRESS: {
|
||||
assert(src.Index == 0);
|
||||
result = this->vp_addr_reg;
|
||||
break;
|
||||
}
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
case PROGRAM_CONSTANT:
|
||||
/* From the ARB_vertex_program specification:
|
||||
* "Relative addressing can only be used for accessing program
|
||||
* parameter arrays."
|
||||
*/
|
||||
if (src.RelAddr) {
|
||||
/* Since we have no idea what the base of the array is, we need to
|
||||
* upload ALL constants as push constants.
|
||||
*/
|
||||
this->need_all_constants_in_pull_buffer = true;
|
||||
|
||||
/* Add the small constant index to the address register */
|
||||
src_reg reladdr = src_reg(this, glsl_type::int_type);
|
||||
dst_reg dst_reladdr = dst_reg(reladdr);
|
||||
dst_reladdr.writemask = WRITEMASK_X;
|
||||
emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
|
||||
|
||||
if (intel->gen < 6)
|
||||
emit(MUL(dst_reladdr, reladdr, src_reg(16)));
|
||||
|
||||
#if 0
|
||||
assert(src.Index < this->uniforms);
|
||||
result = src_reg(dst_reg(UNIFORM, 0));
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
result.reladdr = new(mem_ctx) src_reg();
|
||||
memcpy(result.reladdr, &reladdr, sizeof(src_reg));
|
||||
#endif
|
||||
|
||||
result = src_reg(this, glsl_type::vec4_type);
|
||||
src_reg surf_index = src_reg(unsigned(SURF_INDEX_VERT_CONST_BUFFER));
|
||||
vec4_instruction *load =
|
||||
new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
|
||||
dst_reg(result), surf_index, reladdr);
|
||||
load->base_mrf = 14;
|
||||
load->mlen = 1;
|
||||
emit(load);
|
||||
break;
|
||||
}
|
||||
|
||||
/* We actually want to look at the type in the Parameters list for this,
|
||||
* because this lets us upload constant builtin uniforms as actual
|
||||
* constants.
|
||||
*/
|
||||
switch (plist->Parameters[src.Index].Type) {
|
||||
case PROGRAM_CONSTANT:
|
||||
result = src_reg(this, glsl_type::vec4_type);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dst_reg t = dst_reg(result);
|
||||
t.writemask = 1 << i;
|
||||
emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
|
||||
}
|
||||
break;
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
assert(src.Index < this->uniforms);
|
||||
result = src_reg(dst_reg(UNIFORM, src.Index));
|
||||
result.type = BRW_REGISTER_TYPE_F;
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "bad uniform src register file: %s\n",
|
||||
_mesa_register_file_name((gl_register_file)src.File));
|
||||
return src_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "bad src register file: %s\n",
|
||||
_mesa_register_file_name((gl_register_file)src.File));
|
||||
return src_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
|
||||
if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
|
||||
unsigned short zeros_mask = 0;
|
||||
unsigned short ones_mask = 0;
|
||||
unsigned short src_mask = 0;
|
||||
unsigned short src_swiz[4];
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src_swiz[i] = 0; /* initialize for safety */
|
||||
|
||||
/* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
|
||||
* but it's simplest to handle it here.
|
||||
*/
|
||||
int s = GET_SWZ(src.Swizzle, i);
|
||||
switch (s) {
|
||||
case SWIZZLE_X:
|
||||
case SWIZZLE_Y:
|
||||
case SWIZZLE_Z:
|
||||
case SWIZZLE_W:
|
||||
src_mask |= 1 << i;
|
||||
src_swiz[i] = s;
|
||||
break;
|
||||
case SWIZZLE_ZERO:
|
||||
zeros_mask |= 1 << i;
|
||||
break;
|
||||
case SWIZZLE_ONE:
|
||||
ones_mask |= 1 << i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result.swizzle =
|
||||
BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
|
||||
|
||||
/* The hardware doesn't natively handle the SWZ instruction's zero/one
|
||||
* swizzles or per-component negation, so we need to use a temporary.
|
||||
*/
|
||||
if (zeros_mask || ones_mask || src.Negate) {
|
||||
src_reg temp_src(this, glsl_type::vec4_type);
|
||||
dst_reg temp(temp_src);
|
||||
|
||||
if (src_mask) {
|
||||
temp.writemask = src_mask;
|
||||
emit(MOV(temp, result));
|
||||
}
|
||||
|
||||
if (zeros_mask) {
|
||||
temp.writemask = zeros_mask;
|
||||
emit(MOV(temp, src_reg(0.0f)));
|
||||
}
|
||||
|
||||
if (ones_mask) {
|
||||
temp.writemask = ones_mask;
|
||||
emit(MOV(temp, src_reg(1.0f)));
|
||||
}
|
||||
|
||||
if (src.Negate) {
|
||||
temp.writemask = src.Negate;
|
||||
src_reg neg(temp_src);
|
||||
neg.negate = true;
|
||||
emit(MOV(temp, neg));
|
||||
}
|
||||
result = temp_src;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
|
@ -245,11 +245,12 @@ do_vs_prog(struct brw_context *brw,
|
|||
*/
|
||||
param_count = vs->num_uniform_components * 4;
|
||||
|
||||
/* We also upload clip plane data as uniforms */
|
||||
param_count += MAX_CLIP_PLANES * 4;
|
||||
} else {
|
||||
param_count = vp->program.Base.Parameters->NumParameters * 4;
|
||||
}
|
||||
/* We also upload clip plane data as uniforms */
|
||||
param_count += MAX_CLIP_PLANES * 4;
|
||||
|
||||
c.prog_data.param = rzalloc_array(NULL, const float *, param_count);
|
||||
c.prog_data.pull_param = rzalloc_array(NULL, const float *, param_count);
|
||||
|
||||
|
@ -281,13 +282,9 @@ do_vs_prog(struct brw_context *brw,
|
|||
|
||||
/* Emit GEN4 code.
|
||||
*/
|
||||
if (prog) {
|
||||
if (!brw_vs_emit(prog, &c)) {
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
brw_old_vs_emit(&c);
|
||||
if (!brw_vs_emit(prog, &c)) {
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (c.prog_data.nr_pull_params)
|
||||
|
|
Loading…
Reference in New Issue